From 1054f3d6ff3ceb0f2b061232f66eb007cce5427e Mon Sep 17 00:00:00 2001 From: LuckyLaszlo Date: Tue, 19 Oct 2021 20:25:41 +0200 Subject: [PATCH] lexer (need refactoring) + littles miscs --- Makefile | 2 +- headers/minishell_prototypes.h | 6 +- headers/minishell_structs.h | 26 ++++- srcs/builtins/exit.c | 5 +- srcs/generic.c | 11 +- srcs/init.c | 6 +- srcs/lexing.c | 189 +++++++++++++++++++++++++++++++++ srcs/main.c | 11 +- 8 files changed, 244 insertions(+), 12 deletions(-) create mode 100644 srcs/lexing.c diff --git a/Makefile b/Makefile index 3068941..3e44fba 100644 --- a/Makefile +++ b/Makefile @@ -22,7 +22,7 @@ LIBFT_D = ./libft LIBFT = $(LIBFT_D)/libft.a SRCS = main.c init.c free.c generic.c \ - env.c exit.c + env.c exit.c lexing.c DIR_OBJS = builds OBJS = $(SRCS:%.c=$(DIR_OBJS)/%.o) diff --git a/headers/minishell_prototypes.h b/headers/minishell_prototypes.h index 9ac275f..27abb4e 100644 --- a/headers/minishell_prototypes.h +++ b/headers/minishell_prototypes.h @@ -6,7 +6,7 @@ /* By: lperrey +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2021/10/08 02:59:58 by lperrey #+# #+# */ -/* Updated: 2021/10/10 23:55:14 by lperrey ### ########.fr */ +/* Updated: 2021/10/19 19:55:08 by lperrey ### ########.fr */ /* */ /* ************************************************************************** */ @@ -16,6 +16,9 @@ // Init int init(t_all *c, char *envp[]); +// Lexer +t_token *input_to_tokens(char *input); + // Builtins int builtin_env(int argc, char *argv[], t_all *c); int builtin_exit(int argc, char *argv[], t_all *c); @@ -27,5 +30,6 @@ int free_exit(t_all *c, int exit_status); char *ft_strjoinfree(char *s1, char *s2); char *ft_strjoinfree_s1(char *s1, const char *s2); char *ft_strjoinfree_s2(const char *s1, char *s2); +void ft_lstprint(t_list *lst, int fd); #endif diff --git a/headers/minishell_structs.h b/headers/minishell_structs.h index 6bb2bc6..6b4f46a 100644 --- a/headers/minishell_structs.h +++ b/headers/minishell_structs.h @@ -6,18 +6,42 @@ /* By: lperrey +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2021/10/08 02:35:52 by lperrey #+# #+# */ -/* Updated: 2021/10/10 05:39:09 by lperrey ### ########.fr */ +/* Updated: 2021/10/19 20:20:35 by lperrey ### ########.fr */ /* */ /* ************************************************************************** */ #ifndef MINISHELL_STRUCTS_H # define MINISHELL_STRUCTS_H + +enum e_token_id +{ + T_TOKEN = 0, + T_LESS = '<', + T_GREAT = '>', + T_PIPE = '|', + T_DLESS, //'<<' + T_DGREAT, //'>>' + T_WORD, + //T_NAME, + //T_NEWLINE, + //T_IO_NUMBER + //T_ASSIGNMENT_WORD +}; + +typedef struct s_token +{ + char *content; + struct s_token *next; + enum e_token_id id; +} t_token; + typedef struct s_all { char **envp; char *prompt_base; char *prompt; + t_token *token_list; } t_all; #endif diff --git a/srcs/builtins/exit.c b/srcs/builtins/exit.c index 2a3fd88..c51aaf2 100644 --- a/srcs/builtins/exit.c +++ b/srcs/builtins/exit.c @@ -6,7 +6,7 @@ /* By: lperrey +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2021/10/10 05:01:22 by lperrey #+# #+# */ -/* Updated: 2021/10/11 01:50:53 by lperrey ### ########.fr */ +/* Updated: 2021/10/15 08:45:33 by lperrey ### ########.fr */ /* */ /* ************************************************************************** */ @@ -17,7 +17,8 @@ int builtin_exit(int argc, char *argv[], t_all *c) // WIP unsigned char status; int i; - status = 0; + status = 0; // # should not return '0' by default, but the last exit code value (same as $?) + // status = c->last_exit_value; // LIKE THIS if (argc > 2) return (ft_reti_print(1, "exit: too many arguments\n", 2)); if (argc == 2) diff --git a/srcs/generic.c b/srcs/generic.c index ab88730..52202c9 100644 --- a/srcs/generic.c +++ b/srcs/generic.c @@ -6,7 +6,7 @@ /* By: lperrey +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2021/10/08 09:25:35 by lperrey #+# #+# */ -/* Updated: 2021/10/08 09:28:49 by lperrey ### ########.fr */ +/* Updated: 2021/10/19 19:54:59 by lperrey ### ########.fr */ /* */ /* ************************************************************************** */ @@ -39,3 +39,12 @@ char *ft_strjoinfree_s2(const char *s1, char *s2) free(s2); return (str); } + +void ft_lstprint(t_list *lst, int fd) +{ + while (lst) + { + ft_putendl_fd(lst->content, fd); + lst = lst->next; + } +} \ No newline at end of file diff --git a/srcs/init.c b/srcs/init.c index 6acedc0..31c6122 100644 --- a/srcs/init.c +++ b/srcs/init.c @@ -6,7 +6,7 @@ /* By: lperrey +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2021/10/08 09:22:12 by lperrey #+# #+# */ -/* Updated: 2021/10/10 21:56:33 by lperrey ### ########.fr */ +/* Updated: 2021/10/19 11:58:13 by lperrey ### ########.fr */ /* */ /* ************************************************************************** */ @@ -21,10 +21,10 @@ int init(t_all *c, char *envp[]) c->envp = envp; c->prompt_base = init_prompt_base(); if (!c->prompt_base) - return (ft_reti_perror(0, "init_prompt_base() fail")); + return (ft_reti_perror(0, "init_prompt_base() error")); c->prompt = init_prompt(c->prompt_base); if (!c->prompt) - return (ft_reti_perror(0, "init_prompt() fail")); + return (ft_reti_perror(0, "init_prompt() error")); return (1); } diff --git a/srcs/lexing.c b/srcs/lexing.c new file mode 100644 index 0000000..b706fb3 --- /dev/null +++ b/srcs/lexing.c @@ -0,0 +1,189 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* lexing.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: lperrey +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2021/10/19 08:38:55 by lperrey #+# #+# */ +/* Updated: 2021/10/19 20:11:31 by lperrey ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "minishell.h" +static t_token *alloc_token(size_t content_len); +static int tokenize_input(t_token *t, char *input, size_t input_len); +static int fill_token(t_token *t, char *input, int *i, int *t_i); +static int check_operators(t_token *t, char *input, int *i, int *t_i); + +enum e_in_quote_state +{ + NOT_IN = 0, + IN_QUOTES = '\'', + IN_DQUOTES = '\"' +}; + +enum e_fill_token_return +{ + CONTINUE_TOKEN = 1, + DELIMITE_TOKEN +}; + +t_token *input_to_tokens(char *input) +{ + t_token *t_head; + size_t input_len; + + input_len = ft_strlen(input); + t_head = alloc_token(input_len); + if (!t_head) + return (ft_retp_perror(NULL, "alloc_token() error")); + if (!tokenize_input(t_head, input, input_len)) + return (ft_lstclear((t_list **)&t_head, free)); + return (t_head); +} + +static int tokenize_input(t_token *t, char *input, size_t input_len) +{ + int i; + int t_i; + + i = 0; + t_i = 0; + while (input[i]) + { + if (fill_token(t, input, &i, &t_i) == DELIMITE_TOKEN && input[i] && t_i) + { + if (!t->id) + t->id = T_WORD; + t->next = alloc_token(input_len - i); + if (!t->next) + return (ft_reti_perror(0, "alloc_token() error")); + t = t->next; + t_i = 0; + } + } + if (!t->id) + t->id = T_WORD; + return (1); +} + +static int fill_token(t_token *t, char *input, int *i, int *t_i) +{ + static int in_quotes = 0; + + // operators + if (!in_quotes) + { + if (check_operators(t, input, i, t_i) == DELIMITE_TOKEN) + return (DELIMITE_TOKEN); + } + // quoting + if (input[*i] == '\'' && in_quotes != IN_DQUOTES) + { + t->content[(*t_i)++] = input[(*i)++]; + if (in_quotes == IN_QUOTES) + in_quotes = 0; + else if (ft_strchr(&input[*i], '\'')) // if closed quotes + in_quotes = IN_QUOTES; + return (CONTINUE_TOKEN); + } + else if (input[*i] == '\"' && in_quotes != IN_QUOTES) + { + t->content[(*t_i)++] = input[(*i)++]; + if (in_quotes == IN_DQUOTES) + in_quotes = 0; + else if (ft_strchr(&input[*i], '\"')) // if closed dquotes + in_quotes = IN_DQUOTES; + return (CONTINUE_TOKEN); + } + // blanks + if (!in_quotes && (input[*i] == ' ' || input[*i] == '\t')) + { + while (input[*i] == ' ' || input[*i] == '\t') + (*i)++; + return (DELIMITE_TOKEN); + } + else + t->content[(*t_i)++] = input[(*i)++]; + return (CONTINUE_TOKEN); +} + +static int check_operators(t_token *t, char *input, int *i, int *t_i) +{ + if (*t_i != 0 && (input[*i] == '|' || input[*i] == '<' || input[*i] == '>')) + return (DELIMITE_TOKEN); + if (input[*i] == '|') + { + t->content[(*t_i)++] = input[(*i)++]; + t->id = T_PIPE; + return (DELIMITE_TOKEN); + } + else if (input[*i] == '<') + { + t->content[(*t_i)++] = input[(*i)++]; + t->id = T_LESS; + if (input[*i] == '<') + { + t->content[(*t_i)++] = input[(*i)++]; + t->id = T_DLESS; + } + return (DELIMITE_TOKEN); + } + else if (input[*i] == '>') + { + t->content[(*t_i)++] = input[(*i)++]; + t->id = T_GREAT; + if (input[*i] == '>') + { + t->content[(*t_i)++] = input[(*i)++]; + t->id = T_DGREAT; + } + return (DELIMITE_TOKEN); + } + return (CONTINUE_TOKEN); +} + +static t_token *alloc_token(size_t content_len) +{ + t_token *token; + + token = ft_calloc(1, sizeof *token); + if (!token) + return (NULL); + token->content = ft_calloc(content_len + 1, 1); + if (!token->content) + return (ft_retp_free(NULL, token, free)); + return (token); +} + +/* +https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_03 +1 - OK +2 - OK +3 - OK +4 - OK +5 - OK / SEMI-OSEF +6 - OK +7 - OK +8 - OK +9 - OSEF +10 - OK + +*/ + +// Doublon avec ft_lstclear() +/* void *free_tokens(t_token *t) +{ + void *tmp; + + while (t) + { + if (t->content) + free (t->content); + tmp = t; + t = t->next; + free(tmp); + } + return (NULL); +} */ diff --git a/srcs/main.c b/srcs/main.c index 34f6d74..ccb71e0 100644 --- a/srcs/main.c +++ b/srcs/main.c @@ -6,7 +6,7 @@ /* By: lperrey +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2021/10/04 05:59:26 by lperrey #+# #+# */ -/* Updated: 2021/10/10 08:50:20 by lperrey ### ########.fr */ +/* Updated: 2021/10/19 20:23:19 by lperrey ### ########.fr */ /* */ /* ************************************************************************** */ @@ -28,8 +28,13 @@ void shell_loop(t_all *c) builtin_env(0, NULL, c); else if (!ft_strncmp(line_input, "exit", 5)) // temp placeholder builtin_exit(0, NULL, c); - else - printf("echo: %s\n", line_input); + else // temp placeholder + { + c->token_list = input_to_tokens(line_input); + ft_lstprint((t_list *)c->token_list, 1); + ft_lstclear((t_list **)&c->token_list, free); + //printf("echo: %s\n", line_input); + } } } }