wip parsing and error

This commit is contained in:
hugogogo
2026-05-03 00:20:39 +02:00
parent c24461cb33
commit 42cfdf9734
6 changed files with 134 additions and 285 deletions

View File

@@ -12,6 +12,6 @@ typedef enum
ERROR_SENTINEL,
} program_error;
int stop_errors(program_error err, const char *format, ...);
int stop_errors(program_error err, token *tokens, char *input, const char *format, ...);
#endif

View File

@@ -5,180 +5,6 @@
#include "errors.h"
#include <stdbool.h>
/**
* PROPOSITION 1
*/
// typedef enum
// {
// TOKEN_VARIABLE, // x, y, etc.
// TOKEN_NUMBER, // int or double
// TOKEN_POWER, // ^ or **
// TOKEN_SIGN, // + or -
// TOKEN_FACTOR, // * or / or :
// TOKEN_EQUAL, // =
// TOKEN_END // null (end of input)
// } token_type;
//
// typedef enum
// {
// TOKEN_NUMBER_INT, // int
// TOKEN_NUMBER_DOUBLE, // double
// TOKEN_SIGN_ADD, // +
// TOKEN_SIGN_MINUS, // -
// TOKEN_FACTOR_MULTIPLICATION, // *
// TOKEN_FACTOR_DIVISION, // / or :
// } token_subtype;
//
// typedef struct
// {
// token_type type;
// token_subtype subtype;
// union
// {
// char value_char;
// double value_double;
// };
// } token;
/**
* PROPOSITION 2
*/
// // TYPES and SUBTYPES
// typedef enum
// {
// TOKEN_VARIABLE, // x, y, etc.
// TOKEN_NUMBER, // int or double
// TOKEN_POWER, // ^ or **
// TOKEN_SIGN, // + or -
// TOKEN_FACTOR, // * or / or :
// TOKEN_EQUAL, // =
// TOKEN_END // null (end of input)
// } token_type;
//
// typedef enum
// {
// NUMBER_INT,
// NUMBER_DOUBLE
// } number_subtype;
//
// typedef enum
// {
// SIGN_ADD, // +
// SIGN_MINUS // -
// } sign_subtype;
//
// typedef enum
// {
// FACTOR_MULTIPLICATION, // *
// FACTOR_DIVISION, // / or :
// } factor_subtype;
//
// // DATA
//
// typedef struct
// {
// char value; // e.g., 'x', 'y'
// } token_variable;
//
// typedef struct
// {
// number_subtype subtype;
// double value;
// } token_number;
//
// typedef struct
// {
// sign_subtype subtype;
// char value;
// } token_sign;
//
// typedef struct
// {
// factor_subtype subtype;
// char value;
// } token_factor;
//
// typedef struct
// {
// char value;
// } token_power;
//
// typedef struct
// {
// char value;
// } token_equal;
//
// typedef struct
// {
// char value;
// } token_end;
//
// // TOKEN
//
// typedef union
// {
// token_variable variable; // value
// token_number number; // subtype [INT, DOUBLE], value
// token_sign sign; // subtype [PLUS, MINUS], value
// token_factor factor; // subtype [MULT, DIV], value
// token_power power; // value
// token_equal equal; // value
// token_end end; // value
// } token_data;
//
// typedef struct
// {
// token_type type;
// token_data data;
// } token;
/**
* PROPOSITION 3
*/
// typedef enum
// {
// TOKEN_VARIABLE, // x, y, etc.
// TOKEN_NUMBER, // int or double
// TOKEN_POWER, // ^ or **
// TOKEN_SIGN, // + or -
// TOKEN_FACTOR, // * or / or :
// TOKEN_EQUAL, // =
// TOKEN_END // null (end of input)
// } token_type;
//
// typedef enum
// {
// TOKEN_NO_SUBTYPE,
// // NUMBER
// TOKEN_NUMBER_INT,
// TOKEN_NUMBER_DOUBLE,
// // SIGN
// TOKEN_SIGN_PLUS,
// TOKEN_SIGN_MINUS,
// // FACTOR
// TOKEN_FACTOR_MULTIPLICATION,
// TOKEN_FACTOR_DIVISION,
// } token_subtype;
//
// typedef struct
// {
// token_type type;
// token_subtype subtype;
// union
// {
// char value_char;
// double value_double;
// };
// } token;
/**
* PROPOSITION 4
*/
typedef enum
{
TOKEN_VARIABLE, // x, y, etc.

View File

@@ -1,11 +1,6 @@
#ifndef PARSER_H
#define PARSER_H
#include "libft.h"
#include "lexer.h"
#include "errors.h"
#include <stdbool.h>
typedef enum
{
TERM_LEFT, // a in "a = b"
@@ -25,7 +20,7 @@ typedef struct
term_position position;
term_sign sign;
double coefficient;
int exponent;
double exponent;
} term;
int parse(token *tokens, term *terms, int terms_count_max);

View File

@@ -63,64 +63,19 @@ int main(int ac, char **av)
input = av[1];
remove_spaces(input);
arg_len = ft_strlen(input);
ft_putstr("-> input without space : "); // debug
ft_putstr(input); // debug
ft_putchar('\n'); // debug
ft_putstr("-> arg_len : "); // debug
ft_putnbr(arg_len); // debug
ft_putchar('\n'); // debug
arg_len = ft_strlen(input) + 1; // +1 for last END token
token tokens[arg_len];
// by security, make the last token as END (even though the real END token will likely be lower)
tokens[arg_len - 1].type = TOKEN_END;
tokens[arg_len - 1].tag = TOKEN_NO_TAG;
tokens[arg_len - 1].value_char = '\0';
// lexerize
int tokens_count = lexerize(input, tokens);
ft_putstr("-> tokens_count : "); // debug
ft_putnbr(tokens_count); // debug
ft_putchar('\n'); // debug
// tmp debug output
ft_putchar('\n'); // debug
i = 0;
while (tokens[i].type != TOKEN_END)
if (tokens_count == 0)
{
ft_printf("token %2i - type : ", i);
if (tokens[i].type == TOKEN_VARIABLE)
ft_printf("%20s", "TOKEN_VARIABLE");
else if (tokens[i].type == TOKEN_NUMBER_INT)
ft_printf("%20s", "TOKEN_NUMBER_INT");
else if (tokens[i].type == TOKEN_NUMBER_DOUBLE)
ft_printf("%20s", "TOKEN_NUMBER_DOUBLE");
else if (tokens[i].type == TOKEN_POWER)
ft_printf("%20s", "TOKEN_POWER");
else if (tokens[i].type == TOKEN_SIGN_PLUS)
ft_printf("%20s", "TOKEN_SIGN_PLUS");
else if (tokens[i].type == TOKEN_SIGN_MINUS)
ft_printf("%20s", "TOKEN_SIGN_MINUS");
else if (tokens[i].type == TOKEN_FACTOR_MULT)
ft_printf("%20s", "TOKEN_FACTOR_MULT");
else if (tokens[i].type == TOKEN_FACTOR_DIV)
ft_printf("%20s", "TOKEN_FACTOR_DIV");
else if (tokens[i].type == TOKEN_EQUAL)
ft_printf("%20s", "TOKEN_EQUAL");
else if (tokens[i].type == TOKEN_END)
ft_printf("%20s", "TOKEN_END");
ft_putstr(" - value : ");
if (tokens[i].tag == TOKEN_NUMBER)
{
printf("%g\n", tokens[i].value_double);
stop_errors(ERROR_BASE, tokens, input, "test error");
}
else
{
ft_printf("%c\n", tokens[i].value_char);
}
i++;
}
ft_putchar('\n'); // debug
// END tmp debug output
terms_count_prediction = count_any_of(input, "-+=") + 2; // +1 for first term that can have no leading '+', +1 for last term == NULL

View File

@@ -1,13 +1,16 @@
#include "errors.h"
#include "libft.h"
#include "lexer.h"
#include <stdarg.h>
int stop_errors(program_error err, const char *details, ...)
int stop_errors(program_error err, token *tokens, char *input, const char *details, ...)
{
int i;
// the base error message
const char *msg = "error: error type is out of range";
// Map error codes to messages
// map error codes to messages
const char *error_messages[] = {
[ERROR_BASE] = "error: undefined error, details :",
[ERROR_UNKNOWN_TOKEN] = "error: unknown token, details :",
@@ -18,16 +21,58 @@ int stop_errors(program_error err, const char *details, ...)
// ⚠️ Add new error messages here when adding new error codes!
};
// Override msg if err is in the error_messages array
// override msg if err is in the error_messages array
if (err >= ERROR_BASE && err < ERROR_SENTINEL)
{
msg = error_messages[err];
}
// Print the base message
// print context
ft_dprintf(STDERR_FILENO, "input : %s\n", input);
i = 0;
while (tokens[i].type != TOKEN_END)
{
ft_printf("token %2i - type : ", i);
if (tokens[i].type == TOKEN_VARIABLE)
ft_printf("%20s", "TOKEN_VARIABLE");
else if (tokens[i].type == TOKEN_NUMBER_INT)
ft_printf("%20s", "TOKEN_NUMBER_INT");
else if (tokens[i].type == TOKEN_NUMBER_DOUBLE)
ft_printf("%20s", "TOKEN_NUMBER_DOUBLE");
else if (tokens[i].type == TOKEN_POWER)
ft_printf("%20s", "TOKEN_POWER");
else if (tokens[i].type == TOKEN_SIGN_PLUS)
ft_printf("%20s", "TOKEN_SIGN_PLUS");
else if (tokens[i].type == TOKEN_SIGN_MINUS)
ft_printf("%20s", "TOKEN_SIGN_MINUS");
else if (tokens[i].type == TOKEN_FACTOR_MULT)
ft_printf("%20s", "TOKEN_FACTOR_MULT");
else if (tokens[i].type == TOKEN_FACTOR_DIV)
ft_printf("%20s", "TOKEN_FACTOR_DIV");
else if (tokens[i].type == TOKEN_EQUAL)
ft_printf("%20s", "TOKEN_EQUAL");
else if (tokens[i].type == TOKEN_END)
ft_printf("%20s", "TOKEN_END");
ft_putstr(" - value : ");
if (tokens[i].tag == TOKEN_NUMBER)
{
printf("%g\n", tokens[i].value_double);
}
else
{
ft_printf("%c\n", tokens[i].value_char);
}
i++;
}
ft_putchar('\n');
// print the base message
ft_dprintf(STDERR_FILENO, "%s (%i) - ", msg, err);
// Print the formatted details directly
// print the formatted details
va_list args;
va_start(args, details);
ft_vdprintf(STDERR_FILENO, details, args);

View File

@@ -1,4 +1,8 @@
#include "parser.h"
#include "libft.h"
#include "lexer.h"
#include "errors.h"
#include <stdbool.h>
/**
TOKEN_VARIABLE, // x, y, etc.
@@ -33,15 +37,15 @@ static term_sign get_sign(token *tokens, int i, int *token_count)
// forbidden tokens
if (tokens[i].type == TOKEN_POWER)
{
stop_errors(ERROR_TOKEN_POSITION, "at sign place, we should not have a token 'power' : %c", tokens[i].value_char);
stop_errors(ERROR_TOKEN_POSITION, "at sign place, we should not have a token 'power' : '%c' (token number %i)", tokens[i].value_char, i);
}
if (tokens[i].tag == TOKEN_FACTOR)
{
stop_errors(ERROR_TOKEN_POSITION, "at sign place, we should not have a token 'factor' : %c", tokens[i].value_char);
stop_errors(ERROR_TOKEN_POSITION, "at sign place, we should not have a token 'factor' : '%c' (token number %i)", tokens[i].value_char, i);
}
if (tokens[i].type == TOKEN_EQUAL)
{
stop_errors(ERROR_TOKEN_POSITION, "at sign place, we should not have a token 'equal' : %c", tokens[i].value_char);
stop_errors(ERROR_TOKEN_POSITION, "at sign place, we should not have a token 'equal' : '%c' (token number %i)", tokens[i].value_char, i);
}
// sign
@@ -61,7 +65,7 @@ static term_sign get_sign(token *tokens, int i, int *token_count)
return '+';
}
return stop_errors(ERROR_TOKEN_POSITION, "at begining of term, we should have a token 'sign' : %c", tokens[i].value_char);
return stop_errors(ERROR_TOKEN_POSITION, "at begining of term, we should have a token 'sign', not : '%c' (token number %i)", tokens[i].value_char, i);
}
static double get_coefficient(token *tokens, int i, int *token_count)
@@ -73,19 +77,19 @@ static double get_coefficient(token *tokens, int i, int *token_count)
// forbidden tokens
if (tokens[i].type == TOKEN_POWER)
{
stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'power' : %c", tokens[i].value_char);
stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'power' : '%c' (token number %i)", tokens[i].value_char, i);
}
if (tokens[i].tag == TOKEN_FACTOR)
{
stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'factor' : %c", tokens[i].value_char);
stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'factor' : '%c' (token number %i)", tokens[i].value_char, i);
}
if (tokens[i].type == TOKEN_EQUAL)
{
stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'equal' : %c", tokens[i].value_char);
stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'equal' : '%c' (token number %i)", tokens[i].value_char, i);
}
if (tokens[i].tag == TOKEN_SIGN)
{
stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'sign' : %c", tokens[i].value_char);
stop_errors(ERROR_TOKEN_POSITION, "at coefficient place, we should not have a token 'sign' : '%c' (token number %i)", tokens[i].value_char, i);
}
// if not coefficient token
@@ -127,59 +131,65 @@ static double get_coefficient(token *tokens, int i, int *token_count)
return coefficient;
}
static int get_exponent(token *tokens, int i, int *token_count)
static double get_exponent(token *tokens, int i, int *token_count)
{
/**
* power
* number
* sign
* equal
* factor_div
*/
// forbidden tokens
// first reach VARIABLE
if (tokens[i].type == TOKEN_VARIABLE)
{
i++;
(*token_count)++;
}
else if (tokens[i].type == TOKEN_FACTOR_MULT)
{
i++;
if (tokens[i].type == TOKEN_VARIABLE)
{
i++;
(*token_count) += 2;
}
else
{
stop_errors(ERROR_TOKEN_POSITION, "at exponent place, after a '*' we should have a 'var', but instead got : '%c' (token number %i)", tokens[i].value_char, i);
}
}
else
{
stop_errors(ERROR_TOKEN_POSITION, "at exponent place, the first tokens should be 'x' or '*x', but instead got : '%c' (token number %i)", tokens[i].value_char, i);
}
// then get power sign '^'
if (tokens[i].type == TOKEN_POWER)
{
stop_errors(ERROR_TOKEN_POSITION, "at exponent place, we should not have a token 'power' : %c", tokens[i].value_char);
i++;
(*token_count)++;
}
if (tokens[i].tag == TOKEN_NUMBER)
else
{
stop_errors(ERROR_TOKEN_POSITION, "at exponent place, we should not have a token 'number' : %c", tokens[i].value_char);
}
if (tokens[i].tag == TOKEN_SIGN)
{
stop_errors(ERROR_TOKEN_POSITION, "at exponent place, we should not have a token 'sign' : %c", tokens[i].value_char);
}
if (tokens[i].type == TOKEN_EQUAL)
{
stop_errors(ERROR_TOKEN_POSITION, "at exponent place, we should not have a token 'equal' : %c", tokens[i].value_char);
}
if (tokens[i].type == TOKEN_FACTOR_DIV)
{
stop_errors(ERROR_TOKEN_POSITION, "at exponent place, we should not have a token 'division' : %c", tokens[i].value_char);
stop_errors(ERROR_TOKEN_POSITION, "at exponent place, after variable we should have '^', but instead got : '%c' (token number %i)", tokens[i].value_char, i);
}
// if 'var' -> token_count + 1
// else if '*' + 'var' -> token_count + 2
token_count += 0; // placeholder
return 1; // placeholder
// then get exponent
if (tokens[i].type == TOKEN_NUMBER_INT)
{
i++;
(*token_count)++;
}
else
{
stop_errors(ERROR_TOKEN_POSITION, "at exponent place, we should have an int, but instead got : '%c' (token number %i)", tokens[i].value_char, i);
}
int parse(token *tokens, term *terms, int terms_count_max)
return tokens[i].value_double;
}
static void check_variables(token *tokens)
{
int i;
int terms_count;
int token_count;
term_position term_position;
char var;
terms_count = 0;
token_count = 0;
i = 0;
term_position = TERM_LEFT;
var = 0;
while (tokens[i].type != TOKEN_END && terms_count < terms_count_max)
while (tokens[i].type != TOKEN_END)
{
// variable -> all variables must be the same
if (tokens[i].type == TOKEN_VARIABLE)
@@ -190,10 +200,28 @@ int parse(token *tokens, term *terms, int terms_count_max)
}
else if (var != tokens[i].value_char)
{
stop_errors(ERROR_VAR_DIFF, "old var : '%c' - new var : '%c'", var, tokens[i].value_char);
stop_errors(ERROR_VAR_DIFF, "old var : '%c' - new var : '%c' (token number %i)", var, tokens[i].value_char, i);
}
}
i++;
}
}
int parse(token *tokens, term *terms, int terms_count_max)
{
int i;
int terms_count;
int token_count;
term_position term_position;
check_variables(tokens);
terms_count = 0;
token_count = 0;
i = 0;
term_position = TERM_LEFT;
while (tokens[i].type != TOKEN_END && terms_count < terms_count_max)
{
// equal
if (tokens[i].type == TOKEN_EQUAL)
{