lexing refactoring
This commit is contained in:
4
Makefile
4
Makefile
@@ -5,7 +5,7 @@ CC = clang
|
||||
CFLAGS = -Wall -Wextra $(INCLUDES) -g # add -Werror, del -g
|
||||
|
||||
VPATH = $(DIR_SRCS)
|
||||
DIR_SRCS = srcs srcs/builtins srcs/parsing
|
||||
DIR_SRCS = srcs srcs/builtins srcs/lexing srcs/parsing
|
||||
|
||||
INCLUDES = -I$(HEADERS_D) -I$(LIBFT_D)
|
||||
|
||||
@@ -24,7 +24,7 @@ LIBFT = $(LIBFT_D)/libft.a
|
||||
SRCS = main.c init.c free.c generic.c \
|
||||
signals.c terminal.c \
|
||||
shell_loop.c shell_script.c \
|
||||
lexing.c \
|
||||
lexing.c fill_token.c check_operators.c \
|
||||
parsing.c \
|
||||
valid_syntax.c valid_pipeline.c valid_command.c valid_io_redirect.c \
|
||||
env.c exit.c echo.c
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
/* By: lperrey <lperrey@student.42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2021/10/08 02:35:55 by lperrey #+# #+# */
|
||||
/* Updated: 2021/10/08 03:01:43 by lperrey ### ########.fr */
|
||||
/* Updated: 2021/10/30 22:32:48 by lperrey ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
@@ -18,4 +18,10 @@
|
||||
# define PROMPT_CHEVRON "> "
|
||||
# define PROMPT_EURO "\001€\002 \001\b\002"
|
||||
|
||||
enum e_lexer_return
|
||||
{
|
||||
CONTINUE_TOKEN = 1,
|
||||
DELIMITE_TOKEN
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
196
srcs/lexing.c
196
srcs/lexing.c
@@ -1,196 +0,0 @@
|
||||
/* ************************************************************************** */
|
||||
/* */
|
||||
/* ::: :::::::: */
|
||||
/* lexing.c :+: :+: :+: */
|
||||
/* +:+ +:+ +:+ */
|
||||
/* By: lperrey <lperrey@student.42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2021/10/19 08:38:55 by lperrey #+# #+# */
|
||||
/* Updated: 2021/10/30 14:19:42 by lperrey ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#include "minishell.h"
|
||||
static t_token *alloc_token(size_t content_len);
|
||||
static int tokenize_input(t_token *t, char *input, size_t input_len);
|
||||
static int fill_token(t_token *t, char *input, int *i, int *t_i);
|
||||
static int check_operators(t_token *t, char *input, int *i, int *t_i);
|
||||
|
||||
enum e_in_quote_state
|
||||
{
|
||||
NOT_IN = 0,
|
||||
IN_QUOTES = '\'',
|
||||
IN_DQUOTES = '\"'
|
||||
};
|
||||
|
||||
enum e_fill_token_return
|
||||
{
|
||||
CONTINUE_TOKEN = 1,
|
||||
DELIMITE_TOKEN
|
||||
};
|
||||
|
||||
t_token *input_to_tokens(char *input)
|
||||
{
|
||||
t_token *t_head;
|
||||
t_token *t_tmp;
|
||||
size_t input_len;
|
||||
|
||||
input_len = ft_strlen(input);
|
||||
t_head = alloc_token(input_len);
|
||||
if (!t_head)
|
||||
return (ft_retp_perror(NULL, "alloc_token() error"));
|
||||
if (!tokenize_input(t_head, input, input_len))
|
||||
return (ft_lstclear((t_list **)&t_head, free));
|
||||
t_tmp = (t_token *)ft_lstbeforelast((t_list *)t_head);
|
||||
if (t_tmp && !t_tmp->next->id)
|
||||
{
|
||||
ft_lstdelone((t_list *)t_tmp->next, free);
|
||||
t_tmp->next = NULL;
|
||||
}
|
||||
return (t_head);
|
||||
}
|
||||
|
||||
static int tokenize_input(t_token *t, char *input, size_t input_len)
|
||||
{
|
||||
int i;
|
||||
int t_i;
|
||||
|
||||
i = 0;
|
||||
t_i = 0;
|
||||
while (input[i])
|
||||
{
|
||||
if (fill_token(t, input, &i, &t_i) == DELIMITE_TOKEN && input[i] && t_i)
|
||||
{
|
||||
if (!t->id)
|
||||
t->id = T_WORD;
|
||||
t->next = alloc_token(input_len - i);
|
||||
if (!t->next)
|
||||
return (ft_reti_perror(0, "alloc_token() error"));
|
||||
t = t->next;
|
||||
t_i = 0;
|
||||
}
|
||||
}
|
||||
if (!t->id && t_i)
|
||||
t->id = T_WORD;
|
||||
return (1);
|
||||
}
|
||||
|
||||
static int fill_token(t_token *t, char *input, int *i, int *t_i)
|
||||
{
|
||||
static int in_quotes = 0;
|
||||
|
||||
// operators
|
||||
if (!in_quotes)
|
||||
{
|
||||
if (check_operators(t, input, i, t_i) == DELIMITE_TOKEN)
|
||||
return (DELIMITE_TOKEN);
|
||||
}
|
||||
// quoting
|
||||
if (input[*i] == '\'' && in_quotes != IN_DQUOTES)
|
||||
{
|
||||
t->content[(*t_i)++] = input[(*i)++];
|
||||
if (in_quotes == IN_QUOTES)
|
||||
in_quotes = 0;
|
||||
else if (ft_strchr(&input[*i], '\'')) // if closed quotes
|
||||
in_quotes = IN_QUOTES;
|
||||
return (CONTINUE_TOKEN);
|
||||
}
|
||||
else if (input[*i] == '\"' && in_quotes != IN_QUOTES)
|
||||
{
|
||||
t->content[(*t_i)++] = input[(*i)++];
|
||||
if (in_quotes == IN_DQUOTES)
|
||||
in_quotes = 0;
|
||||
else if (ft_strchr(&input[*i], '\"')) // if closed dquotes
|
||||
in_quotes = IN_DQUOTES;
|
||||
return (CONTINUE_TOKEN);
|
||||
}
|
||||
// blanks
|
||||
if (!in_quotes && (input[*i] == ' ' || input[*i] == '\t'))
|
||||
{
|
||||
while (input[*i] == ' ' || input[*i] == '\t')
|
||||
(*i)++;
|
||||
return (DELIMITE_TOKEN);
|
||||
}
|
||||
else
|
||||
t->content[(*t_i)++] = input[(*i)++];
|
||||
return (CONTINUE_TOKEN);
|
||||
}
|
||||
|
||||
static int check_operators(t_token *t, char *input, int *i, int *t_i)
|
||||
{
|
||||
if (*t_i != 0 && (input[*i] == '|' || input[*i] == '<' || input[*i] == '>'))
|
||||
return (DELIMITE_TOKEN);
|
||||
if (input[*i] == '|')
|
||||
{
|
||||
t->content[(*t_i)++] = input[(*i)++];
|
||||
t->id = T_PIPE;
|
||||
return (DELIMITE_TOKEN);
|
||||
}
|
||||
else if (input[*i] == '<')
|
||||
{
|
||||
t->content[(*t_i)++] = input[(*i)++];
|
||||
t->id = T_LESS;
|
||||
if (input[*i] == '<')
|
||||
{
|
||||
t->content[(*t_i)++] = input[(*i)++];
|
||||
t->id = T_DLESS;
|
||||
}
|
||||
return (DELIMITE_TOKEN);
|
||||
}
|
||||
else if (input[*i] == '>')
|
||||
{
|
||||
t->content[(*t_i)++] = input[(*i)++];
|
||||
t->id = T_GREAT;
|
||||
if (input[*i] == '>')
|
||||
{
|
||||
t->content[(*t_i)++] = input[(*i)++];
|
||||
t->id = T_DGREAT;
|
||||
}
|
||||
return (DELIMITE_TOKEN);
|
||||
}
|
||||
return (CONTINUE_TOKEN);
|
||||
}
|
||||
|
||||
static t_token *alloc_token(size_t content_len)
|
||||
{
|
||||
t_token *token;
|
||||
|
||||
token = ft_calloc(1, sizeof *token);
|
||||
if (!token)
|
||||
return (NULL);
|
||||
token->content = ft_calloc(content_len + 1, 1);
|
||||
if (!token->content)
|
||||
return (ft_retp_free(NULL, token, free));
|
||||
return (token);
|
||||
}
|
||||
|
||||
/*
|
||||
https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_03
|
||||
1 - OK
|
||||
2 - OK
|
||||
3 - OK
|
||||
4 - OK
|
||||
5 - OK / SEMI-OSEF
|
||||
6 - OK
|
||||
7 - OK
|
||||
8 - OK
|
||||
9 - OSEF
|
||||
10 - OK
|
||||
|
||||
*/
|
||||
|
||||
// Doublon avec ft_lstclear()
|
||||
/* void *free_tokens(t_token *t)
|
||||
{
|
||||
void *tmp;
|
||||
|
||||
while (t)
|
||||
{
|
||||
if (t->content)
|
||||
free (t->content);
|
||||
tmp = t;
|
||||
t = t->next;
|
||||
free(tmp);
|
||||
}
|
||||
return (NULL);
|
||||
} */
|
||||
65
srcs/lexing/check_operators.c
Normal file
65
srcs/lexing/check_operators.c
Normal file
@@ -0,0 +1,65 @@
|
||||
/* ************************************************************************** */
|
||||
/* */
|
||||
/* ::: :::::::: */
|
||||
/* check_operators.c :+: :+: :+: */
|
||||
/* +:+ +:+ +:+ */
|
||||
/* By: lperrey <lperrey@student.42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2021/10/19 08:38:55 by lperrey #+# #+# */
|
||||
/* Updated: 2021/10/30 22:37:08 by lperrey ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#include "minishell.h"
|
||||
|
||||
static int check_redirection(t_token *t, char *input, int *i, int *t_i);
|
||||
static int check_pipe(t_token *t, char *input, int *i, int *t_i);
|
||||
|
||||
int check_operators(t_token *t, char *input, int *i, int *t_i)
|
||||
{
|
||||
if (*t_i != 0 && (input[*i] == '|' || input[*i] == '<' || input[*i] == '>'))
|
||||
return (DELIMITE_TOKEN);
|
||||
else if (check_pipe(t, input, i, t_i))
|
||||
return (DELIMITE_TOKEN);
|
||||
else if (check_redirection(t, input, i, t_i))
|
||||
return (DELIMITE_TOKEN);
|
||||
return (CONTINUE_TOKEN);
|
||||
}
|
||||
|
||||
static int check_pipe(t_token *t, char *input, int *i, int *t_i)
|
||||
{
|
||||
if (input[*i] == '|')
|
||||
{
|
||||
t->content[(*t_i)++] = input[(*i)++];
|
||||
t->id = T_PIPE;
|
||||
return (1);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int check_redirection(t_token *t, char *input, int *i, int *t_i)
|
||||
{
|
||||
if (input[*i] == '<')
|
||||
{
|
||||
t->content[(*t_i)++] = input[(*i)++];
|
||||
t->id = T_LESS;
|
||||
if (input[*i] == '<')
|
||||
{
|
||||
t->content[(*t_i)++] = input[(*i)++];
|
||||
t->id = T_DLESS;
|
||||
}
|
||||
return (1);
|
||||
}
|
||||
else if (input[*i] == '>')
|
||||
{
|
||||
t->content[(*t_i)++] = input[(*i)++];
|
||||
t->id = T_GREAT;
|
||||
if (input[*i] == '>')
|
||||
{
|
||||
t->content[(*t_i)++] = input[(*i)++];
|
||||
t->id = T_DGREAT;
|
||||
}
|
||||
return (1);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
77
srcs/lexing/fill_token.c
Normal file
77
srcs/lexing/fill_token.c
Normal file
@@ -0,0 +1,77 @@
|
||||
/* ************************************************************************** */
|
||||
/* */
|
||||
/* ::: :::::::: */
|
||||
/* fill_token.c :+: :+: :+: */
|
||||
/* +:+ +:+ +:+ */
|
||||
/* By: lperrey <lperrey@student.42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2021/10/19 08:38:55 by lperrey #+# #+# */
|
||||
/* Updated: 2021/10/30 22:35:01 by lperrey ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#include "minishell.h"
|
||||
int check_operators(t_token *t, char *input, int *i, int *t_i);
|
||||
|
||||
enum e_in_quote_state
|
||||
{
|
||||
NOT_IN = 0,
|
||||
IN_QUOTES = '\'',
|
||||
IN_DQUOTES = '\"'
|
||||
};
|
||||
|
||||
int fill_token(t_token *t, char *input, int *i, int *t_i)
|
||||
{
|
||||
static int in_quotes = 0;
|
||||
|
||||
// operators
|
||||
if (!in_quotes)
|
||||
{
|
||||
if (check_operators(t, input, i, t_i) == DELIMITE_TOKEN)
|
||||
return (DELIMITE_TOKEN);
|
||||
}
|
||||
// quoting
|
||||
if (input[*i] == '\'' && in_quotes != IN_DQUOTES)
|
||||
{
|
||||
t->content[(*t_i)++] = input[(*i)++];
|
||||
if (in_quotes == IN_QUOTES)
|
||||
in_quotes = 0;
|
||||
else if (ft_strchr(&input[*i], '\'')) // if closed quotes
|
||||
in_quotes = IN_QUOTES;
|
||||
return (CONTINUE_TOKEN);
|
||||
}
|
||||
else if (input[*i] == '\"' && in_quotes != IN_QUOTES)
|
||||
{
|
||||
t->content[(*t_i)++] = input[(*i)++];
|
||||
if (in_quotes == IN_DQUOTES)
|
||||
in_quotes = 0;
|
||||
else if (ft_strchr(&input[*i], '\"')) // if closed dquotes
|
||||
in_quotes = IN_DQUOTES;
|
||||
return (CONTINUE_TOKEN);
|
||||
}
|
||||
// blanks
|
||||
if (!in_quotes && (input[*i] == ' ' || input[*i] == '\t'))
|
||||
{
|
||||
while (input[*i] == ' ' || input[*i] == '\t')
|
||||
(*i)++;
|
||||
return (DELIMITE_TOKEN);
|
||||
}
|
||||
else
|
||||
t->content[(*t_i)++] = input[(*i)++];
|
||||
return (CONTINUE_TOKEN);
|
||||
}
|
||||
|
||||
/*
|
||||
https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_03
|
||||
1 - OK
|
||||
2 - OK
|
||||
3 - OK
|
||||
4 - OK
|
||||
5 - OK / SEMI-OSEF
|
||||
6 - OK
|
||||
7 - OK
|
||||
8 - OK
|
||||
9 - OSEF
|
||||
10 - OK
|
||||
|
||||
*/
|
||||
76
srcs/lexing/lexing.c
Normal file
76
srcs/lexing/lexing.c
Normal file
@@ -0,0 +1,76 @@
|
||||
/* ************************************************************************** */
|
||||
/* */
|
||||
/* ::: :::::::: */
|
||||
/* lexing.c :+: :+: :+: */
|
||||
/* +:+ +:+ +:+ */
|
||||
/* By: lperrey <lperrey@student.42.fr> +#+ +:+ +#+ */
|
||||
/* +#+#+#+#+#+ +#+ */
|
||||
/* Created: 2021/10/19 08:38:55 by lperrey #+# #+# */
|
||||
/* Updated: 2021/10/30 22:37:48 by lperrey ### ########.fr */
|
||||
/* */
|
||||
/* ************************************************************************** */
|
||||
|
||||
#include "minishell.h"
|
||||
int fill_token(t_token *t, char *input, int *i, int *t_i);
|
||||
|
||||
static t_token *alloc_token(size_t content_len);
|
||||
static int tokenize_input(t_token *t, char *input, size_t input_len);
|
||||
|
||||
t_token *input_to_tokens(char *input)
|
||||
{
|
||||
t_token *t_head;
|
||||
t_token *t_tmp;
|
||||
size_t input_len;
|
||||
|
||||
input_len = ft_strlen(input);
|
||||
t_head = alloc_token(input_len);
|
||||
if (!t_head)
|
||||
return (ft_retp_perror(NULL, "alloc_token() error"));
|
||||
if (!tokenize_input(t_head, input, input_len))
|
||||
return (ft_lstclear((t_list **)&t_head, free));
|
||||
t_tmp = (t_token *)ft_lstbeforelast((t_list *)t_head);
|
||||
if (t_tmp && !t_tmp->next->id)
|
||||
{
|
||||
ft_lstdelone((t_list *)t_tmp->next, free);
|
||||
t_tmp->next = NULL;
|
||||
}
|
||||
return (t_head);
|
||||
}
|
||||
|
||||
static t_token *alloc_token(size_t content_len)
|
||||
{
|
||||
t_token *token;
|
||||
|
||||
token = ft_calloc(1, sizeof *token);
|
||||
if (!token)
|
||||
return (NULL);
|
||||
token->content = ft_calloc(content_len + 1, 1);
|
||||
if (!token->content)
|
||||
return (ft_retp_free(NULL, token, free));
|
||||
return (token);
|
||||
}
|
||||
|
||||
static int tokenize_input(t_token *t, char *input, size_t input_len)
|
||||
{
|
||||
int i;
|
||||
int t_i;
|
||||
|
||||
i = 0;
|
||||
t_i = 0;
|
||||
while (input[i])
|
||||
{
|
||||
if (fill_token(t, input, &i, &t_i) == DELIMITE_TOKEN && input[i] && t_i)
|
||||
{
|
||||
if (!t->id)
|
||||
t->id = T_WORD;
|
||||
t->next = alloc_token(input_len - i);
|
||||
if (!t->next)
|
||||
return (ft_reti_perror(0, "alloc_token() error"));
|
||||
t = t->next;
|
||||
t_i = 0;
|
||||
}
|
||||
}
|
||||
if (!t->id && t_i)
|
||||
t->id = T_WORD;
|
||||
return (1);
|
||||
}
|
||||
Reference in New Issue
Block a user