190 lines
4.4 KiB
C
190 lines
4.4 KiB
C
/* ************************************************************************** */
|
|
/* */
|
|
/* ::: :::::::: */
|
|
/* lexing.c :+: :+: :+: */
|
|
/* +:+ +:+ +:+ */
|
|
/* By: lperrey <lperrey@student.42.fr> +#+ +:+ +#+ */
|
|
/* +#+#+#+#+#+ +#+ */
|
|
/* Created: 2021/10/19 08:38:55 by lperrey #+# #+# */
|
|
/* Updated: 2021/10/19 20:11:31 by lperrey ### ########.fr */
|
|
/* */
|
|
/* ************************************************************************** */
|
|
|
|
#include "minishell.h"
|
|
static t_token *alloc_token(size_t content_len);
|
|
static int tokenize_input(t_token *t, char *input, size_t input_len);
|
|
static int fill_token(t_token *t, char *input, int *i, int *t_i);
|
|
static int check_operators(t_token *t, char *input, int *i, int *t_i);
|
|
|
|
enum e_in_quote_state
|
|
{
|
|
NOT_IN = 0,
|
|
IN_QUOTES = '\'',
|
|
IN_DQUOTES = '\"'
|
|
};
|
|
|
|
enum e_fill_token_return
|
|
{
|
|
CONTINUE_TOKEN = 1,
|
|
DELIMITE_TOKEN
|
|
};
|
|
|
|
t_token *input_to_tokens(char *input)
|
|
{
|
|
t_token *t_head;
|
|
size_t input_len;
|
|
|
|
input_len = ft_strlen(input);
|
|
t_head = alloc_token(input_len);
|
|
if (!t_head)
|
|
return (ft_retp_perror(NULL, "alloc_token() error"));
|
|
if (!tokenize_input(t_head, input, input_len))
|
|
return (ft_lstclear((t_list **)&t_head, free));
|
|
return (t_head);
|
|
}
|
|
|
|
static int tokenize_input(t_token *t, char *input, size_t input_len)
|
|
{
|
|
int i;
|
|
int t_i;
|
|
|
|
i = 0;
|
|
t_i = 0;
|
|
while (input[i])
|
|
{
|
|
if (fill_token(t, input, &i, &t_i) == DELIMITE_TOKEN && input[i] && t_i)
|
|
{
|
|
if (!t->id)
|
|
t->id = T_WORD;
|
|
t->next = alloc_token(input_len - i);
|
|
if (!t->next)
|
|
return (ft_reti_perror(0, "alloc_token() error"));
|
|
t = t->next;
|
|
t_i = 0;
|
|
}
|
|
}
|
|
if (!t->id)
|
|
t->id = T_WORD;
|
|
return (1);
|
|
}
|
|
|
|
static int fill_token(t_token *t, char *input, int *i, int *t_i)
|
|
{
|
|
static int in_quotes = 0;
|
|
|
|
// operators
|
|
if (!in_quotes)
|
|
{
|
|
if (check_operators(t, input, i, t_i) == DELIMITE_TOKEN)
|
|
return (DELIMITE_TOKEN);
|
|
}
|
|
// quoting
|
|
if (input[*i] == '\'' && in_quotes != IN_DQUOTES)
|
|
{
|
|
t->content[(*t_i)++] = input[(*i)++];
|
|
if (in_quotes == IN_QUOTES)
|
|
in_quotes = 0;
|
|
else if (ft_strchr(&input[*i], '\'')) // if closed quotes
|
|
in_quotes = IN_QUOTES;
|
|
return (CONTINUE_TOKEN);
|
|
}
|
|
else if (input[*i] == '\"' && in_quotes != IN_QUOTES)
|
|
{
|
|
t->content[(*t_i)++] = input[(*i)++];
|
|
if (in_quotes == IN_DQUOTES)
|
|
in_quotes = 0;
|
|
else if (ft_strchr(&input[*i], '\"')) // if closed dquotes
|
|
in_quotes = IN_DQUOTES;
|
|
return (CONTINUE_TOKEN);
|
|
}
|
|
// blanks
|
|
if (!in_quotes && (input[*i] == ' ' || input[*i] == '\t'))
|
|
{
|
|
while (input[*i] == ' ' || input[*i] == '\t')
|
|
(*i)++;
|
|
return (DELIMITE_TOKEN);
|
|
}
|
|
else
|
|
t->content[(*t_i)++] = input[(*i)++];
|
|
return (CONTINUE_TOKEN);
|
|
}
|
|
|
|
static int check_operators(t_token *t, char *input, int *i, int *t_i)
|
|
{
|
|
if (*t_i != 0 && (input[*i] == '|' || input[*i] == '<' || input[*i] == '>'))
|
|
return (DELIMITE_TOKEN);
|
|
if (input[*i] == '|')
|
|
{
|
|
t->content[(*t_i)++] = input[(*i)++];
|
|
t->id = T_PIPE;
|
|
return (DELIMITE_TOKEN);
|
|
}
|
|
else if (input[*i] == '<')
|
|
{
|
|
t->content[(*t_i)++] = input[(*i)++];
|
|
t->id = T_LESS;
|
|
if (input[*i] == '<')
|
|
{
|
|
t->content[(*t_i)++] = input[(*i)++];
|
|
t->id = T_DLESS;
|
|
}
|
|
return (DELIMITE_TOKEN);
|
|
}
|
|
else if (input[*i] == '>')
|
|
{
|
|
t->content[(*t_i)++] = input[(*i)++];
|
|
t->id = T_GREAT;
|
|
if (input[*i] == '>')
|
|
{
|
|
t->content[(*t_i)++] = input[(*i)++];
|
|
t->id = T_DGREAT;
|
|
}
|
|
return (DELIMITE_TOKEN);
|
|
}
|
|
return (CONTINUE_TOKEN);
|
|
}
|
|
|
|
static t_token *alloc_token(size_t content_len)
|
|
{
|
|
t_token *token;
|
|
|
|
token = ft_calloc(1, sizeof *token);
|
|
if (!token)
|
|
return (NULL);
|
|
token->content = ft_calloc(content_len + 1, 1);
|
|
if (!token->content)
|
|
return (ft_retp_free(NULL, token, free));
|
|
return (token);
|
|
}
|
|
|
|
/*
|
|
https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_03
|
|
1 - OK
|
|
2 - OK
|
|
3 - OK
|
|
4 - OK
|
|
5 - OK / SEMI-OSEF
|
|
6 - OK
|
|
7 - OK
|
|
8 - OK
|
|
9 - OSEF
|
|
10 - OK
|
|
|
|
*/
|
|
|
|
// Doublon avec ft_lstclear()
|
|
/* void *free_tokens(t_token *t)
|
|
{
|
|
void *tmp;
|
|
|
|
while (t)
|
|
{
|
|
if (t->content)
|
|
free (t->content);
|
|
tmp = t;
|
|
t = t->next;
|
|
free(tmp);
|
|
}
|
|
return (NULL);
|
|
} */
|