init parser

lexer types equal and factor
lexer complete with luke memory solution
2026-04-30 23:19:14 +02:00 · 2026-04-30 22:48:51 +02:00 · 2026-04-29 12:41:19 +02:00 · 2026-04-29 00:58:33 +02:00 · 2026-04-29 00:27:46 +02:00 · 2026-04-28 23:42:25 +02:00
10 changed files with 341 additions and 50 deletions
--- a/8
+++ b/8
@@ -36,11 +36,13 @@ D_LIB         = ./libft
 D_HEADERS     = ./headers
 HEADERS       = computorv1.h \
 				errors.h \
-				lexer.h
+				lexer.h \
 				parser.h
 D_SRCS        = ./src
 SRCS          = computorv1.c \
 				errors.c \
-				lexer.c
+				lexer.c \
 				parser.c
 # COMPILATION CONFIG :
 CC            = gcc
@@ -95,7 +97,7 @@ $(NAME): $(OBJS)
 run: $(NAME)
 	@echo $(YELLOW)"run"$(RESET)
-	@./$(NAME) "3 * x^2 + 1 * x^1 - 2 * x^0"
+	@./$(NAME) "3.4 * x^2 + 1 * x^1 - 2 * x^0 = 5.123 * x^1"
 clean:
 	$(RM_OBJS)
--- a/headers/computorv1.h
+++ b/headers/computorv1.h
@@ -2,5 +2,9 @@
 #define COMPUTORV1_H
 #include "libft.h"
 #include "lexer.h"
 #include "parser.h"
 #include "errors.h"
 #include <stdio.h> // tmp for printf, for float debug
 #endif
--- a/headers/errors.h
+++ b/headers/errors.h
@@ -7,6 +7,7 @@ typedef enum
 {
    ERROR_BASIC = 0,
    ERROR_UNKNOWN_TOKEN = -1,
    ERROR_NUMBER_TOO_BIG = -2,
 } program_error;
 int stop_errors(int err);
--- a/headers/lexer.h
+++ b/headers/lexer.h
@@ -1,18 +1,20 @@
 #ifndef LEXER_H
 #define LEXER_H
-#include "../libft/includes/libft.h"
+#include "libft.h"
 #include "errors.h"
 #include <stdbool.h>
 typedef enum
 {
-    TOKEN_PLUS,           // +
+    TOKEN_VARIABLE,      // x, y, etc.
-    TOKEN_MINUS,          // -
+    TOKEN_NUMBER_INT,    // int
-    TOKEN_VARIABLE,       // x, y, etc.
+    TOKEN_NUMBER_DOUBLE, // double
-    TOKEN_NUMBER,         // int or double
+    TOKEN_POWER,         // ^ or **
-    TOKEN_POWER,          // ^ or **
+    TOKEN_SIGN,          // + or -
-    TOKEN_MULTIPLICATION, // *
+    TOKEN_FACTOR,        // * or /
-    TOKEN_DIVISION,       // /
+    TOKEN_EQUAL,         // =
-    TOKEN_END             // null (end of input)
+    TOKEN_END            // null (end of input)
 } token_type;
 typedef struct
@@ -20,12 +22,12 @@ typedef struct
    token_type type;
    union
    {
-        double num_value; // For NUMBER
+        char value_char;
-        char var_value;   // For VARIABLE (single char, e.g., 'x')
+        int value_int;
        double value_double;
    };
 } token;
-#define MAX_TOKENS 100
+int lexerize(const char *input, token *tokens);
 int lexerize(const char *input, token tokens[MAX_TOKENS]);
 #endif
--- a/headers/parser.h
+++ b/headers/parser.h
@@ -0,0 +1,27 @@
 #ifndef PARSER_H
 #define PARSER_H
 #include "libft.h"
 #include "lexer.h"
 #include "errors.h"
 typedef enum
 {
    TERM_LEFT,  // a in "a = b"
    TERM_RIGHT, // b in "a = b"
 } term_type;
 typedef struct
 {
    term_type type;
    union
    {
        char value_char;
        int value_int;
        double value_double;
    };
 } term;
 int parse(token *tokens, term *terms);
 #endif
--- a/2
+++ b/2
--- a/src/computorv1.c
+++ b/src/computorv1.c
@@ -1,19 +1,45 @@
 #include "computorv1.h"
-#include "lexer.h"
+
-#include "errors.h"
+void remove_spaces(char *s)
 {
    char *read = s;
    char *write = s;
    // copy all non-space chars
    while (*read)
    {
        if (!ft_isspace(*read))
        {
            *write++ = *read;
        }
        read++;
    }
    *write = '\0';
    // zero the rest of the buffer
    while (write != read)
    {
        *write++ = '\0';
    }
 }
 int main(int ac, char **av)
 {
    int i;
-    int ret;
+    int arg_len;
    char *input;
    if (ac < 2)
    {
        return 0;
    }
    // tmp debug output
    ft_putstr("-> received args :\n"); // debug
    i = 0;
    while (i < ac)
    {
        ft_putstr("   ");
        ft_putnbr(i);
        ft_putstr(" : ");
        ft_putstr(av[i]);
@@ -21,28 +47,73 @@ int main(int ac, char **av)
        i++;
    }
-    token tokens[MAX_TOKENS];
+    input = av[1];
-    ret = lexerize(av[1], tokens);
+    remove_spaces(input);
-    if (ret <= 0)
+    arg_len = ft_strlen(input);
-    {
+
-        stop_errors(ret);
+    ft_putstr("-> input without space : "); // debug
-    }
+    ft_putstr(input);                       // debug
    ft_putchar('\n');                       // debug
    ft_putstr("-> arg_len : ");             // debug
    ft_putnbr(arg_len);                     // debug
    ft_putchar('\n');                       // debug
    token tokens[arg_len];
    int token_count = lexerize(input, tokens);
    ft_putstr("-> token_count : "); // debug
    ft_putnbr(token_count);         // debug
    ft_putchar('\n');               // debug
    // tmp debug output
    ft_putchar('\n'); // debug
    i = 0;
    while (tokens[i].type != TOKEN_END)
    {
-        ft_printf("token %i :\n  type : %i\n  value : ", i, tokens[i].type);
+        ft_printf("token %2i - type : ", i);
-        if (tokens[i].type == TOKEN_NUMBER)
+
        if (tokens[i].type == TOKEN_VARIABLE)
            ft_printf("%20s", "TOKEN_VARIABLE");
        if (tokens[i].type == TOKEN_NUMBER_INT)
            ft_printf("%20s", "TOKEN_NUMBER_INT");
        if (tokens[i].type == TOKEN_NUMBER_DOUBLE)
            ft_printf("%20s", "TOKEN_NUMBER_DOUBLE");
        if (tokens[i].type == TOKEN_POWER)
            ft_printf("%20s", "TOKEN_POWER");
        if (tokens[i].type == TOKEN_SIGN)
            ft_printf("%20s", "TOKEN_SIGN");
        if (tokens[i].type == TOKEN_FACTOR)
            ft_printf("%20s", "TOKEN_FACTOR");
        if (tokens[i].type == TOKEN_EQUAL)
            ft_printf("%20s", "TOKEN_EQUAL");
        if (tokens[i].type == TOKEN_END)
            ft_printf("%20s", "TOKEN_END");
        ft_putstr(" - value : ");
        if (tokens[i].type == TOKEN_NUMBER_INT)
        {
-            ft_printf("%d\n", i, tokens[i].num_value);
+            printf("%i\n", tokens[i].value_int);
        }
        else if (tokens[i].type == TOKEN_NUMBER_DOUBLE)
        {
            printf("%f\n", tokens[i].value_double);
        }
        else
        {
-            ft_printf("%c\n", tokens[i].var_value);
+            ft_printf("%c\n", tokens[i].value_char);
        }
        i++;
    }
    ft_putchar('\n'); // debug
    // END tmp debug output
    term terms[token_count / 2];
    int term_count = parse(tokens, terms);
    ft_putstr("-> term_count : "); // debug
    ft_putnbr(term_count);         // debug
    ft_putchar('\n');              // debug
    return (0);
 }
--- a/src/errors.c
+++ b/src/errors.c
@@ -7,7 +7,9 @@ int stop_errors(int err)
    case ERROR_UNKNOWN_TOKEN:
        ft_putstr_fd("error: unknown token\n", STDERR_FILENO);
        break;
-
+    case ERROR_NUMBER_TOO_BIG:
        ft_putstr_fd("error: number is too big\n", STDERR_FILENO);
        break;
    default:
        ft_putstr_fd("unknown error\n", STDERR_FILENO);
        break;
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -1,52 +1,224 @@
 #include "lexer.h"
 #include "errors.h"
-static int skip_whitespace(const char *input, int input_pos)
+// any single letter is a valid variable, like 'x' or 'y'
 static bool token_is_variable(const char *input, int input_pos, int *token_size)
 {
-    while (ft_isspace(input[input_pos]))
+    if (ft_isalpha(input[input_pos]))
    {
-        input_pos++;
+        *token_size = 1;
        return true;
    }
-    return input_pos;
+    return false;
 }
-static int token_is_plus(const char *input, int input_pos)
+// number can be int "123"
 static bool token_is_number_int(const char *input, int input_pos, int *token_size)
 {
-    return (input[input_pos] == '+');
+    int number_size;
    int max_number_size;
    if (!ft_isdigit(input[input_pos]))
    {
        return false;
    }
    number_size = 1;
    max_number_size = 16; // max size for int
    while (number_size <= max_number_size)
    {
        if (ft_isdigit(input[input_pos + number_size]))
        {
            number_size++;
        }
        else if (input[input_pos + number_size] == '.')
        {
            if (ft_isdigit(input[input_pos + number_size + 1]))
            {
                // number is double
                return false;
            }
            else
                break;
        }
        else
            break;
    }
    if (number_size > max_number_size)
    {
        stop_errors(ERROR_NUMBER_TOO_BIG);
    }
    *token_size = number_size;
    return true;
 }
-int lexerize(const char *input, token tokens[MAX_TOKENS])
+// number can be double "123.456"
 static bool token_is_number_double(const char *input, int input_pos, int *token_size)
 {
-    int token_count = 0;
+    int number_size;
-    int input_pos = 0;
+    int max_number_size;
-    int token_size = 0;
+    bool has_dot;
    if (!ft_isdigit(input[input_pos]))
    {
        return false;
    }
    has_dot = false;
    number_size = 1;
    max_number_size = 129; // max size for double double is 128 bits, + the coma
    while (number_size <= max_number_size)
    {
        if (ft_isdigit(input[input_pos + number_size]))
        {
            number_size++;
        }
        else if (input[input_pos + number_size] == '.')
        {
            if (has_dot)
            {
                // number is not a valid double, it has 2 dots
                return false;
            }
            if (!ft_isdigit(input[input_pos + number_size + 1]))
            {
                // number is not a double, it has no number after the dot
                return false;
            }
            has_dot = true;
            number_size++;
        }
        else
            break;
    }
    if (number_size > max_number_size)
    {
        stop_errors(ERROR_NUMBER_TOO_BIG);
    }
    *token_size = number_size;
    return true;
 }
 // power can be '^' and "**"
 static bool token_is_power(const char *input, int input_pos, int *token_size)
 {
    if (input[input_pos] == '^')
    {
        *token_size = 1;
        return true;
    }
    else if (ft_memcmp(&input[input_pos], "**", 2) == 0)
    {
        *token_size = 2;
        return true;
    }
    return false;
 }
 // sign can be '+' or '-'
 static bool token_is_sign(const char *input, int input_pos, int *token_size)
 {
    if (input[input_pos] == '+' || input[input_pos] == '-')
    {
        *token_size = 1;
        return true;
    }
    return false;
 }
 // factor can be '*' or '/' or ':'
 static bool token_is_factor(const char *input, int input_pos, int *token_size)
 {
    if (input[input_pos] == '*' || input[input_pos] == '/' || input[input_pos] == ':')
    {
        *token_size = 1;
        return true;
    }
    return false;
 }
 // detect a single '='
 static bool token_is_equal(const char *input, int input_pos, int *token_size)
 {
    if (input[input_pos] == '=')
    {
        *token_size = 1;
        return true;
    }
    return false;
 }
 /**
 * LEXER
 */
 int lexerize(const char *input, token *tokens)
 {
    int token_count;
    int input_pos;
    int token_size;
    token_count = 0;
    input_pos = 0;
    while (input[input_pos])
    {
-        input_pos = skip_whitespace(input, input_pos);
+        token_size = 0;
        if (input[input_pos] == '\0')
        {
            break;
        }
-        token_size = token_is_plus(input, input_pos);
+        if (token_is_variable(input, input_pos, &token_size))
        if (token_size)
        {
-            tokens[token_count].type = TOKEN_PLUS;
+            tokens[token_count].type = TOKEN_VARIABLE;
-            tokens[token_count].var_value = '+';
+            tokens[token_count].value_char = 'x';
        }
        else if (token_is_number_int(input, input_pos, &token_size))
        {
            tokens[token_count].type = TOKEN_NUMBER_INT;
            tokens[token_count].value_int = ft_atoi(&input[input_pos]);
        }
        else if (token_is_number_double(input, input_pos, &token_size))
        {
            tokens[token_count].type = TOKEN_NUMBER_DOUBLE;
            tokens[token_count].value_double = ft_atof(&input[input_pos]);
        }
        else if (token_is_power(input, input_pos, &token_size))
        {
            tokens[token_count].type = TOKEN_POWER;
            tokens[token_count].value_char = '^';
        }
        else if (token_is_sign(input, input_pos, &token_size))
        {
            tokens[token_count].type = TOKEN_SIGN;
            tokens[token_count].value_char = input[input_pos];
        }
        else if (token_is_factor(input, input_pos, &token_size))
        {
            tokens[token_count].type = TOKEN_FACTOR;
            tokens[token_count].value_char = input[input_pos];
        }
        else if (token_is_equal(input, input_pos, &token_size))
        {
            tokens[token_count].type = TOKEN_EQUAL;
            tokens[token_count].value_char = '=';
        }
        else
        {
            // tmp
            token_size = 1;
            // stop_errors(ERROR_UNKNOWN_TOKEN);
        }
        token_count++;
        if (token_size == 0)
        {
            stop_errors(ERROR_UNKNOWN_TOKEN);
        }
        token_count++;
        input_pos += token_size;
    }
    tokens[token_count].type = TOKEN_END;
-    tokens[token_count].var_value = '\0';
+    tokens[token_count].value_char = '\0';
-    return 1;
+    return token_count;
 }
--- a/src/parser.c
+++ b/src/parser.c
@@ -0,0 +1,10 @@
 #include "parser.h"
 int parse(token *tokens, term *terms)
 {
    if (tokens)
    {
        terms[0].type = 0;
    }
    return 1;
 }
Author	SHA1	Message	Date
hugogogo	512ba9b5f4	init parser	2026-04-30 23:19:14 +02:00
hugogogo	7a30dcc345	lexer types equal and factor	2026-04-30 22:48:51 +02:00
hugogogo	26fa8025ef	lexer complete with luke memory solution	2026-04-29 12:41:19 +02:00
hugogogo	9ced220c00	add atof	2026-04-29 00:58:33 +02:00
hugogogo	52c76767bd	all lexer except atoi	2026-04-29 00:27:46 +02:00
hugogogo	5108571deb	lexer adding is_number	2026-04-28 23:42:25 +02:00