Files
42_INT_07_minishell/srcs/lexing.c
LuckyLaszlo 1054f3d6ff lexer (need refactoring)
+ littles miscs
2021-10-19 20:25:41 +02:00

190 lines
4.4 KiB
C

/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* lexing.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: lperrey <lperrey@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2021/10/19 08:38:55 by lperrey #+# #+# */
/* Updated: 2021/10/19 20:11:31 by lperrey ### ########.fr */
/* */
/* ************************************************************************** */
#include "minishell.h"
static t_token *alloc_token(size_t content_len);
static int tokenize_input(t_token *t, char *input, size_t input_len);
static int fill_token(t_token *t, char *input, int *i, int *t_i);
static int check_operators(t_token *t, char *input, int *i, int *t_i);
enum e_in_quote_state
{
NOT_IN = 0,
IN_QUOTES = '\'',
IN_DQUOTES = '\"'
};
enum e_fill_token_return
{
CONTINUE_TOKEN = 1,
DELIMITE_TOKEN
};
t_token *input_to_tokens(char *input)
{
t_token *t_head;
size_t input_len;
input_len = ft_strlen(input);
t_head = alloc_token(input_len);
if (!t_head)
return (ft_retp_perror(NULL, "alloc_token() error"));
if (!tokenize_input(t_head, input, input_len))
return (ft_lstclear((t_list **)&t_head, free));
return (t_head);
}
static int tokenize_input(t_token *t, char *input, size_t input_len)
{
int i;
int t_i;
i = 0;
t_i = 0;
while (input[i])
{
if (fill_token(t, input, &i, &t_i) == DELIMITE_TOKEN && input[i] && t_i)
{
if (!t->id)
t->id = T_WORD;
t->next = alloc_token(input_len - i);
if (!t->next)
return (ft_reti_perror(0, "alloc_token() error"));
t = t->next;
t_i = 0;
}
}
if (!t->id)
t->id = T_WORD;
return (1);
}
static int fill_token(t_token *t, char *input, int *i, int *t_i)
{
static int in_quotes = 0;
// operators
if (!in_quotes)
{
if (check_operators(t, input, i, t_i) == DELIMITE_TOKEN)
return (DELIMITE_TOKEN);
}
// quoting
if (input[*i] == '\'' && in_quotes != IN_DQUOTES)
{
t->content[(*t_i)++] = input[(*i)++];
if (in_quotes == IN_QUOTES)
in_quotes = 0;
else if (ft_strchr(&input[*i], '\'')) // if closed quotes
in_quotes = IN_QUOTES;
return (CONTINUE_TOKEN);
}
else if (input[*i] == '\"' && in_quotes != IN_QUOTES)
{
t->content[(*t_i)++] = input[(*i)++];
if (in_quotes == IN_DQUOTES)
in_quotes = 0;
else if (ft_strchr(&input[*i], '\"')) // if closed dquotes
in_quotes = IN_DQUOTES;
return (CONTINUE_TOKEN);
}
// blanks
if (!in_quotes && (input[*i] == ' ' || input[*i] == '\t'))
{
while (input[*i] == ' ' || input[*i] == '\t')
(*i)++;
return (DELIMITE_TOKEN);
}
else
t->content[(*t_i)++] = input[(*i)++];
return (CONTINUE_TOKEN);
}
static int check_operators(t_token *t, char *input, int *i, int *t_i)
{
if (*t_i != 0 && (input[*i] == '|' || input[*i] == '<' || input[*i] == '>'))
return (DELIMITE_TOKEN);
if (input[*i] == '|')
{
t->content[(*t_i)++] = input[(*i)++];
t->id = T_PIPE;
return (DELIMITE_TOKEN);
}
else if (input[*i] == '<')
{
t->content[(*t_i)++] = input[(*i)++];
t->id = T_LESS;
if (input[*i] == '<')
{
t->content[(*t_i)++] = input[(*i)++];
t->id = T_DLESS;
}
return (DELIMITE_TOKEN);
}
else if (input[*i] == '>')
{
t->content[(*t_i)++] = input[(*i)++];
t->id = T_GREAT;
if (input[*i] == '>')
{
t->content[(*t_i)++] = input[(*i)++];
t->id = T_DGREAT;
}
return (DELIMITE_TOKEN);
}
return (CONTINUE_TOKEN);
}
static t_token *alloc_token(size_t content_len)
{
t_token *token;
token = ft_calloc(1, sizeof *token);
if (!token)
return (NULL);
token->content = ft_calloc(content_len + 1, 1);
if (!token->content)
return (ft_retp_free(NULL, token, free));
return (token);
}
/*
https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_03
1 - OK
2 - OK
3 - OK
4 - OK
5 - OK / SEMI-OSEF
6 - OK
7 - OK
8 - OK
9 - OSEF
10 - OK
*/
// Doublon avec ft_lstclear()
/* void *free_tokens(t_token *t)
{
void *tmp;
while (t)
{
if (t->content)
free (t->content);
tmp = t;
t = t->next;
free(tmp);
}
return (NULL);
} */