/* ************************************************************************** */ /* */ /* ::: :::::::: */ /* lexing.c :+: :+: :+: */ /* +:+ +:+ +:+ */ /* By: lperrey +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2021/10/19 08:38:55 by lperrey #+# #+# */ /* Updated: 2021/10/30 14:19:42 by lperrey ### ########.fr */ /* */ /* ************************************************************************** */ #include "minishell.h" static t_token *alloc_token(size_t content_len); static int tokenize_input(t_token *t, char *input, size_t input_len); static int fill_token(t_token *t, char *input, int *i, int *t_i); static int check_operators(t_token *t, char *input, int *i, int *t_i); enum e_in_quote_state { NOT_IN = 0, IN_QUOTES = '\'', IN_DQUOTES = '\"' }; enum e_fill_token_return { CONTINUE_TOKEN = 1, DELIMITE_TOKEN }; t_token *input_to_tokens(char *input) { t_token *t_head; t_token *t_tmp; size_t input_len; input_len = ft_strlen(input); t_head = alloc_token(input_len); if (!t_head) return (ft_retp_perror(NULL, "alloc_token() error")); if (!tokenize_input(t_head, input, input_len)) return (ft_lstclear((t_list **)&t_head, free)); t_tmp = (t_token *)ft_lstbeforelast((t_list *)t_head); if (t_tmp && !t_tmp->next->id) { ft_lstdelone((t_list *)t_tmp->next, free); t_tmp->next = NULL; } return (t_head); } static int tokenize_input(t_token *t, char *input, size_t input_len) { int i; int t_i; i = 0; t_i = 0; while (input[i]) { if (fill_token(t, input, &i, &t_i) == DELIMITE_TOKEN && input[i] && t_i) { if (!t->id) t->id = T_WORD; t->next = alloc_token(input_len - i); if (!t->next) return (ft_reti_perror(0, "alloc_token() error")); t = t->next; t_i = 0; } } if (!t->id && t_i) t->id = T_WORD; return (1); } static int fill_token(t_token *t, char *input, int *i, int *t_i) { static int in_quotes = 0; // operators if (!in_quotes) { if (check_operators(t, input, i, t_i) == DELIMITE_TOKEN) return (DELIMITE_TOKEN); } // quoting if (input[*i] == '\'' && in_quotes != IN_DQUOTES) { t->content[(*t_i)++] = input[(*i)++]; if (in_quotes == IN_QUOTES) in_quotes = 0; else if (ft_strchr(&input[*i], '\'')) // if closed quotes in_quotes = IN_QUOTES; return (CONTINUE_TOKEN); } else if (input[*i] == '\"' && in_quotes != IN_QUOTES) { t->content[(*t_i)++] = input[(*i)++]; if (in_quotes == IN_DQUOTES) in_quotes = 0; else if (ft_strchr(&input[*i], '\"')) // if closed dquotes in_quotes = IN_DQUOTES; return (CONTINUE_TOKEN); } // blanks if (!in_quotes && (input[*i] == ' ' || input[*i] == '\t')) { while (input[*i] == ' ' || input[*i] == '\t') (*i)++; return (DELIMITE_TOKEN); } else t->content[(*t_i)++] = input[(*i)++]; return (CONTINUE_TOKEN); } static int check_operators(t_token *t, char *input, int *i, int *t_i) { if (*t_i != 0 && (input[*i] == '|' || input[*i] == '<' || input[*i] == '>')) return (DELIMITE_TOKEN); if (input[*i] == '|') { t->content[(*t_i)++] = input[(*i)++]; t->id = T_PIPE; return (DELIMITE_TOKEN); } else if (input[*i] == '<') { t->content[(*t_i)++] = input[(*i)++]; t->id = T_LESS; if (input[*i] == '<') { t->content[(*t_i)++] = input[(*i)++]; t->id = T_DLESS; } return (DELIMITE_TOKEN); } else if (input[*i] == '>') { t->content[(*t_i)++] = input[(*i)++]; t->id = T_GREAT; if (input[*i] == '>') { t->content[(*t_i)++] = input[(*i)++]; t->id = T_DGREAT; } return (DELIMITE_TOKEN); } return (CONTINUE_TOKEN); } static t_token *alloc_token(size_t content_len) { t_token *token; token = ft_calloc(1, sizeof *token); if (!token) return (NULL); token->content = ft_calloc(content_len + 1, 1); if (!token->content) return (ft_retp_free(NULL, token, free)); return (token); } /* https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_03 1 - OK 2 - OK 3 - OK 4 - OK 5 - OK / SEMI-OSEF 6 - OK 7 - OK 8 - OK 9 - OSEF 10 - OK */ // Doublon avec ft_lstclear() /* void *free_tokens(t_token *t) { void *tmp; while (t) { if (t->content) free (t->content); tmp = t; t = t->next; free(tmp); } return (NULL); } */