From c0ef57499addcfb4fec41b2c990d52badfd1e785 Mon Sep 17 00:00:00 2001 From: LuckyLaszlo Date: Sat, 30 Oct 2021 22:47:02 +0200 Subject: [PATCH] lexing refactoring --- Makefile | 4 +- headers/minishell_macro.h | 8 +- srcs/lexing.c | 196 ---------------------------------- srcs/lexing/check_operators.c | 65 +++++++++++ srcs/lexing/fill_token.c | 77 +++++++++++++ srcs/lexing/lexing.c | 76 +++++++++++++ 6 files changed, 227 insertions(+), 199 deletions(-) delete mode 100644 srcs/lexing.c create mode 100644 srcs/lexing/check_operators.c create mode 100644 srcs/lexing/fill_token.c create mode 100644 srcs/lexing/lexing.c diff --git a/Makefile b/Makefile index cd67765..0f7d936 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ CC = clang CFLAGS = -Wall -Wextra $(INCLUDES) -g # add -Werror, del -g VPATH = $(DIR_SRCS) -DIR_SRCS = srcs srcs/builtins srcs/parsing +DIR_SRCS = srcs srcs/builtins srcs/lexing srcs/parsing INCLUDES = -I$(HEADERS_D) -I$(LIBFT_D) @@ -24,7 +24,7 @@ LIBFT = $(LIBFT_D)/libft.a SRCS = main.c init.c free.c generic.c \ signals.c terminal.c \ shell_loop.c shell_script.c \ - lexing.c \ + lexing.c fill_token.c check_operators.c \ parsing.c \ valid_syntax.c valid_pipeline.c valid_command.c valid_io_redirect.c \ env.c exit.c echo.c diff --git a/headers/minishell_macro.h b/headers/minishell_macro.h index 9fbda6f..7ff7858 100644 --- a/headers/minishell_macro.h +++ b/headers/minishell_macro.h @@ -6,7 +6,7 @@ /* By: lperrey +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2021/10/08 02:35:55 by lperrey #+# #+# */ -/* Updated: 2021/10/08 03:01:43 by lperrey ### ########.fr */ +/* Updated: 2021/10/30 22:32:48 by lperrey ### ########.fr */ /* */ /* ************************************************************************** */ @@ -18,4 +18,10 @@ # define PROMPT_CHEVRON "> " # define PROMPT_EURO "\001€\002 \001\b\002" +enum e_lexer_return +{ + CONTINUE_TOKEN = 1, + DELIMITE_TOKEN +}; + #endif diff --git a/srcs/lexing.c b/srcs/lexing.c deleted file mode 100644 index 5036a54..0000000 --- a/srcs/lexing.c +++ /dev/null @@ -1,196 +0,0 @@ -/* ************************************************************************** */ -/* */ -/* ::: :::::::: */ -/* lexing.c :+: :+: :+: */ -/* +:+ +:+ +:+ */ -/* By: lperrey +#+ +:+ +#+ */ -/* +#+#+#+#+#+ +#+ */ -/* Created: 2021/10/19 08:38:55 by lperrey #+# #+# */ -/* Updated: 2021/10/30 14:19:42 by lperrey ### ########.fr */ -/* */ -/* ************************************************************************** */ - -#include "minishell.h" -static t_token *alloc_token(size_t content_len); -static int tokenize_input(t_token *t, char *input, size_t input_len); -static int fill_token(t_token *t, char *input, int *i, int *t_i); -static int check_operators(t_token *t, char *input, int *i, int *t_i); - -enum e_in_quote_state -{ - NOT_IN = 0, - IN_QUOTES = '\'', - IN_DQUOTES = '\"' -}; - -enum e_fill_token_return -{ - CONTINUE_TOKEN = 1, - DELIMITE_TOKEN -}; - -t_token *input_to_tokens(char *input) -{ - t_token *t_head; - t_token *t_tmp; - size_t input_len; - - input_len = ft_strlen(input); - t_head = alloc_token(input_len); - if (!t_head) - return (ft_retp_perror(NULL, "alloc_token() error")); - if (!tokenize_input(t_head, input, input_len)) - return (ft_lstclear((t_list **)&t_head, free)); - t_tmp = (t_token *)ft_lstbeforelast((t_list *)t_head); - if (t_tmp && !t_tmp->next->id) - { - ft_lstdelone((t_list *)t_tmp->next, free); - t_tmp->next = NULL; - } - return (t_head); -} - -static int tokenize_input(t_token *t, char *input, size_t input_len) -{ - int i; - int t_i; - - i = 0; - t_i = 0; - while (input[i]) - { - if (fill_token(t, input, &i, &t_i) == DELIMITE_TOKEN && input[i] && t_i) - { - if (!t->id) - t->id = T_WORD; - t->next = alloc_token(input_len - i); - if (!t->next) - return (ft_reti_perror(0, "alloc_token() error")); - t = t->next; - t_i = 0; - } - } - if (!t->id && t_i) - t->id = T_WORD; - return (1); -} - -static int fill_token(t_token *t, char *input, int *i, int *t_i) -{ - static int in_quotes = 0; - - // operators - if (!in_quotes) - { - if (check_operators(t, input, i, t_i) == DELIMITE_TOKEN) - return (DELIMITE_TOKEN); - } - // quoting - if (input[*i] == '\'' && in_quotes != IN_DQUOTES) - { - t->content[(*t_i)++] = input[(*i)++]; - if (in_quotes == IN_QUOTES) - in_quotes = 0; - else if (ft_strchr(&input[*i], '\'')) // if closed quotes - in_quotes = IN_QUOTES; - return (CONTINUE_TOKEN); - } - else if (input[*i] == '\"' && in_quotes != IN_QUOTES) - { - t->content[(*t_i)++] = input[(*i)++]; - if (in_quotes == IN_DQUOTES) - in_quotes = 0; - else if (ft_strchr(&input[*i], '\"')) // if closed dquotes - in_quotes = IN_DQUOTES; - return (CONTINUE_TOKEN); - } - // blanks - if (!in_quotes && (input[*i] == ' ' || input[*i] == '\t')) - { - while (input[*i] == ' ' || input[*i] == '\t') - (*i)++; - return (DELIMITE_TOKEN); - } - else - t->content[(*t_i)++] = input[(*i)++]; - return (CONTINUE_TOKEN); -} - -static int check_operators(t_token *t, char *input, int *i, int *t_i) -{ - if (*t_i != 0 && (input[*i] == '|' || input[*i] == '<' || input[*i] == '>')) - return (DELIMITE_TOKEN); - if (input[*i] == '|') - { - t->content[(*t_i)++] = input[(*i)++]; - t->id = T_PIPE; - return (DELIMITE_TOKEN); - } - else if (input[*i] == '<') - { - t->content[(*t_i)++] = input[(*i)++]; - t->id = T_LESS; - if (input[*i] == '<') - { - t->content[(*t_i)++] = input[(*i)++]; - t->id = T_DLESS; - } - return (DELIMITE_TOKEN); - } - else if (input[*i] == '>') - { - t->content[(*t_i)++] = input[(*i)++]; - t->id = T_GREAT; - if (input[*i] == '>') - { - t->content[(*t_i)++] = input[(*i)++]; - t->id = T_DGREAT; - } - return (DELIMITE_TOKEN); - } - return (CONTINUE_TOKEN); -} - -static t_token *alloc_token(size_t content_len) -{ - t_token *token; - - token = ft_calloc(1, sizeof *token); - if (!token) - return (NULL); - token->content = ft_calloc(content_len + 1, 1); - if (!token->content) - return (ft_retp_free(NULL, token, free)); - return (token); -} - -/* -https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_03 -1 - OK -2 - OK -3 - OK -4 - OK -5 - OK / SEMI-OSEF -6 - OK -7 - OK -8 - OK -9 - OSEF -10 - OK - -*/ - -// Doublon avec ft_lstclear() -/* void *free_tokens(t_token *t) -{ - void *tmp; - - while (t) - { - if (t->content) - free (t->content); - tmp = t; - t = t->next; - free(tmp); - } - return (NULL); -} */ diff --git a/srcs/lexing/check_operators.c b/srcs/lexing/check_operators.c new file mode 100644 index 0000000..1a73014 --- /dev/null +++ b/srcs/lexing/check_operators.c @@ -0,0 +1,65 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* check_operators.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: lperrey +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2021/10/19 08:38:55 by lperrey #+# #+# */ +/* Updated: 2021/10/30 22:37:08 by lperrey ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "minishell.h" + +static int check_redirection(t_token *t, char *input, int *i, int *t_i); +static int check_pipe(t_token *t, char *input, int *i, int *t_i); + +int check_operators(t_token *t, char *input, int *i, int *t_i) +{ + if (*t_i != 0 && (input[*i] == '|' || input[*i] == '<' || input[*i] == '>')) + return (DELIMITE_TOKEN); + else if (check_pipe(t, input, i, t_i)) + return (DELIMITE_TOKEN); + else if (check_redirection(t, input, i, t_i)) + return (DELIMITE_TOKEN); + return (CONTINUE_TOKEN); +} + +static int check_pipe(t_token *t, char *input, int *i, int *t_i) +{ + if (input[*i] == '|') + { + t->content[(*t_i)++] = input[(*i)++]; + t->id = T_PIPE; + return (1); + } + return (0); +} + +static int check_redirection(t_token *t, char *input, int *i, int *t_i) +{ + if (input[*i] == '<') + { + t->content[(*t_i)++] = input[(*i)++]; + t->id = T_LESS; + if (input[*i] == '<') + { + t->content[(*t_i)++] = input[(*i)++]; + t->id = T_DLESS; + } + return (1); + } + else if (input[*i] == '>') + { + t->content[(*t_i)++] = input[(*i)++]; + t->id = T_GREAT; + if (input[*i] == '>') + { + t->content[(*t_i)++] = input[(*i)++]; + t->id = T_DGREAT; + } + return (1); + } + return (0); +} diff --git a/srcs/lexing/fill_token.c b/srcs/lexing/fill_token.c new file mode 100644 index 0000000..8431139 --- /dev/null +++ b/srcs/lexing/fill_token.c @@ -0,0 +1,77 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* fill_token.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: lperrey +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2021/10/19 08:38:55 by lperrey #+# #+# */ +/* Updated: 2021/10/30 22:35:01 by lperrey ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "minishell.h" +int check_operators(t_token *t, char *input, int *i, int *t_i); + +enum e_in_quote_state +{ + NOT_IN = 0, + IN_QUOTES = '\'', + IN_DQUOTES = '\"' +}; + +int fill_token(t_token *t, char *input, int *i, int *t_i) +{ + static int in_quotes = 0; + + // operators + if (!in_quotes) + { + if (check_operators(t, input, i, t_i) == DELIMITE_TOKEN) + return (DELIMITE_TOKEN); + } + // quoting + if (input[*i] == '\'' && in_quotes != IN_DQUOTES) + { + t->content[(*t_i)++] = input[(*i)++]; + if (in_quotes == IN_QUOTES) + in_quotes = 0; + else if (ft_strchr(&input[*i], '\'')) // if closed quotes + in_quotes = IN_QUOTES; + return (CONTINUE_TOKEN); + } + else if (input[*i] == '\"' && in_quotes != IN_QUOTES) + { + t->content[(*t_i)++] = input[(*i)++]; + if (in_quotes == IN_DQUOTES) + in_quotes = 0; + else if (ft_strchr(&input[*i], '\"')) // if closed dquotes + in_quotes = IN_DQUOTES; + return (CONTINUE_TOKEN); + } + // blanks + if (!in_quotes && (input[*i] == ' ' || input[*i] == '\t')) + { + while (input[*i] == ' ' || input[*i] == '\t') + (*i)++; + return (DELIMITE_TOKEN); + } + else + t->content[(*t_i)++] = input[(*i)++]; + return (CONTINUE_TOKEN); +} + +/* +https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_03 +1 - OK +2 - OK +3 - OK +4 - OK +5 - OK / SEMI-OSEF +6 - OK +7 - OK +8 - OK +9 - OSEF +10 - OK + +*/ diff --git a/srcs/lexing/lexing.c b/srcs/lexing/lexing.c new file mode 100644 index 0000000..e089544 --- /dev/null +++ b/srcs/lexing/lexing.c @@ -0,0 +1,76 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* lexing.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: lperrey +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2021/10/19 08:38:55 by lperrey #+# #+# */ +/* Updated: 2021/10/30 22:37:48 by lperrey ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "minishell.h" +int fill_token(t_token *t, char *input, int *i, int *t_i); + +static t_token *alloc_token(size_t content_len); +static int tokenize_input(t_token *t, char *input, size_t input_len); + +t_token *input_to_tokens(char *input) +{ + t_token *t_head; + t_token *t_tmp; + size_t input_len; + + input_len = ft_strlen(input); + t_head = alloc_token(input_len); + if (!t_head) + return (ft_retp_perror(NULL, "alloc_token() error")); + if (!tokenize_input(t_head, input, input_len)) + return (ft_lstclear((t_list **)&t_head, free)); + t_tmp = (t_token *)ft_lstbeforelast((t_list *)t_head); + if (t_tmp && !t_tmp->next->id) + { + ft_lstdelone((t_list *)t_tmp->next, free); + t_tmp->next = NULL; + } + return (t_head); +} + +static t_token *alloc_token(size_t content_len) +{ + t_token *token; + + token = ft_calloc(1, sizeof *token); + if (!token) + return (NULL); + token->content = ft_calloc(content_len + 1, 1); + if (!token->content) + return (ft_retp_free(NULL, token, free)); + return (token); +} + +static int tokenize_input(t_token *t, char *input, size_t input_len) +{ + int i; + int t_i; + + i = 0; + t_i = 0; + while (input[i]) + { + if (fill_token(t, input, &i, &t_i) == DELIMITE_TOKEN && input[i] && t_i) + { + if (!t->id) + t->id = T_WORD; + t->next = alloc_token(input_len - i); + if (!t->next) + return (ft_reti_perror(0, "alloc_token() error")); + t = t->next; + t_i = 0; + } + } + if (!t->id && t_i) + t->id = T_WORD; + return (1); +}