syntax analysis with simplified shell grammar

+ TODO : bug to fix in lexer.c
This commit is contained in:
LuckyLaszlo
2021-10-24 19:58:19 +02:00
parent 906074d2cb
commit 815cedb8ca
11 changed files with 400 additions and 18 deletions

View File

@@ -5,7 +5,7 @@ CC = clang
CFLAGS = -Wall -Wextra -Werror $(INCLUDES) -g
VPATH = $(DIR_SRCS)
DIR_SRCS = srcs srcs/builtins
DIR_SRCS = srcs srcs/builtins srcs/parsing
INCLUDES = -I$(HEADERS_D) -I$(LIBFT_D)
@@ -24,6 +24,8 @@ LIBFT = $(LIBFT_D)/libft.a
SRCS = main.c init.c free.c generic.c \
shell_loop.c \
lexing.c \
parsing.c \
valid_syntax.c valid_pipeline.c valid_command.c valid_io_redirect.c \
env.c exit.c echo.c
DIR_OBJS = builds

View File

@@ -6,7 +6,7 @@
/* By: lperrey <lperrey@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2021/10/08 02:59:58 by lperrey #+# #+# */
/* Updated: 2021/10/23 15:19:42 by lperrey ### ########.fr */
/* Updated: 2021/10/24 19:20:09 by lperrey ### ########.fr */
/* */
/* ************************************************************************** */
@@ -16,11 +16,17 @@
// Init
int init(t_all *c, char *envp[]);
// Shell loop
void shell_loop(t_all *c);
// Lexer
t_token *input_to_tokens(char *input);
// Shell loop
void shell_loop(t_all *c);
// Parser
t_cmd **parsing(t_token *token_list);
int valid_syntax(t_token *token_list);
int valid_token(t_token **token_list, enum e_token_id token_id);
int valid_command_separator(const t_token *token_list);
// Builtins
int builtin_env(int argc, char *argv[], t_all *c);

View File

@@ -6,14 +6,13 @@
/* By: lperrey <lperrey@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2021/10/08 02:35:52 by lperrey #+# #+# */
/* Updated: 2021/10/22 12:41:17 by lperrey ### ########.fr */
/* Updated: 2021/10/24 19:18:28 by lperrey ### ########.fr */
/* */
/* ************************************************************************** */
#ifndef MINISHELL_STRUCTS_H
# define MINISHELL_STRUCTS_H
enum e_token_id
{
T_TOKEN = 0,
@@ -22,11 +21,7 @@ enum e_token_id
T_PIPE = '|',
T_DLESS, //'<<'
T_DGREAT, //'>>'
T_WORD,
//T_NAME,
//T_NEWLINE,
//T_IO_NUMBER
//T_ASSIGNMENT_WORD
T_WORD
};
typedef struct s_token
@@ -36,8 +31,21 @@ typedef struct s_token
enum e_token_id id;
} t_token;
struct s_all;
typedef int (*t_builtin_ptr)(int,char **,struct s_all *);
typedef struct s_cmd
{
char **argv;
pid_t pid;
t_builtin_ptr *builtin_command;
int fd_in;
int fd_out;
} t_cmd;
typedef struct s_all
{
t_cmd **cmd_arr;
char **envp;
char *prompt_base;
char *prompt;

View File

@@ -1,3 +1,5 @@
https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_10_02
/* -------------------------------------------------------
The grammar symbols
------------------------------------------------------- */
@@ -8,6 +10,49 @@
%token DGREAT // '>>'
%token PIPE // '|'
/* -------------------------------------------------------
The Simplified Grammar
------------------------------------------------------- */
%start program
%%
pipeline : command
| pipe_sequence '|' command
;
command : cmd_prefix cmd_name cmd_suffix
| cmd_prefix cmd_name
| cmd_name cmd_suffix
| cmd_name
;
cmd_name : WORD // Apply rule 7a
;
cmd_prefix : io_redirect
| cmd_prefix io_redirect
;
cmd_suffix : io_redirect
| cmd_suffix io_redirect
| WORD
| cmd_suffix WORD
;
io_redirect : io_file
| io_here
;
io_file : '<' filename
| LESSAND filename
| '>' filename
| GREATAND filename
| DGREAT filename
| LESSGREAT filename
| CLOBBER filename
;
filename : WORD // Apply rule 2
;
io_here : DLESS here_end
;
here_end : WORD // Apply rule 3
;
/* -------------------------------------------------------
The Grammar
------------------------------------------------------- */

View File

@@ -6,7 +6,7 @@
/* By: lperrey <lperrey@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2021/10/19 08:38:55 by lperrey #+# #+# */
/* Updated: 2021/10/19 20:11:31 by lperrey ### ########.fr */
/* Updated: 2021/10/24 19:53:40 by lperrey ### ########.fr */
/* */
/* ************************************************************************** */
@@ -43,6 +43,8 @@ t_token *input_to_tokens(char *input)
return (t_head);
}
// TODO : Fix final space saved after a pipe like in "cmd | "
// "cmd | " should behave like "cmd |"
static int tokenize_input(t_token *t, char *input, size_t input_len)
{
int i;
@@ -63,8 +65,10 @@ static int tokenize_input(t_token *t, char *input, size_t input_len)
t_i = 0;
}
}
if (!t->id)
if (!t->id && t_i) // Fix parser syntax, but last elem must still be free
t->id = T_WORD;
/* if (!t->id)
t->id = T_WORD; */
return (1);
}

101
srcs/parsing/parsing.c Normal file
View File

@@ -0,0 +1,101 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* parsing.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: lperrey <lperrey@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2021/10/24 10:52:40 by lperrey #+# #+# */
/* Updated: 2021/10/24 19:19:08 by lperrey ### ########.fr */
/* */
/* ************************************************************************** */
#include "minishell.h"
// A quoi bon un arbre binaire ? Je ne vois plus l'utilité.
/* typedef struct s_binary_tree
{
char *content;
struct s_binary_tree *sub;
struct s_binary_tree *sibling;
enum e_token_id id;
} t_binary_tree; */
t_cmd **parsing(t_token *token_list)
{
t_cmd **cmd_arr;
size_t cmd_nbr;
(void)cmd_arr;
(void)cmd_nbr;
/* t_binary_tree *syntax_tree;
syntax_tree = ft_calloc(1, sizeof *syntax_tree);
if (!syntax_tree)
return (0); //WIP ERROR */
if (!valid_syntax(token_list))
return (NULL);
// Pipes count (determine cmd_nbr)
// Struct CMD alloc/fill
// 2.9.1 - 2) Expansion
// 2.9.1 - 3) Redirection
//return (cmd_arr);
return ((t_cmd **)1); //temp test
}
/* -------------------------------------------------------
The grammar symbols
------------------------------------------------------- */
/*
%token WORD
%token LESS // '<'
%token GREAT // '>'
%token DLESS // '<<'
%token DGREAT // '>>'
%token PIPE // '|'
*/
/* -------------------------------------------------------
The Simplified Grammar
------------------------------------------------------- */
/*
%start program
%%
pipeline : command
| pipe_sequence '|' command
;
command : cmd_prefix cmd_name cmd_suffix
| cmd_prefix cmd_name
| cmd_name cmd_suffix
| cmd_name
;
cmd_name : WORD // Apply rule 7a
;
cmd_prefix : io_redirect
| cmd_prefix io_redirect
;
cmd_suffix : io_redirect
| cmd_suffix io_redirect
| WORD
| cmd_suffix WORD
;
io_redirect : io_file
| io_here
;
io_file : '<' filename
| '>' filename
| DGREAT filename
;
filename : WORD // Apply rule 2
;
io_here : DLESS here_end
;
here_end : WORD // Apply rule 3
;
*/

View File

@@ -0,0 +1,95 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* valid_command.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: lperrey <lperrey@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2021/10/24 18:52:05 by lperrey #+# #+# */
/* Updated: 2021/10/24 19:21:28 by lperrey ### ########.fr */
/* */
/* ************************************************************************** */
#include "minishell.h"
int valid_io_redirect(t_token **token_list);
static int valid_command_rule1(t_token **token_list);
static int valid_command_rule2(t_token **token_list);
static int valid_command_rule3(t_token **token_list);
static int valid_command_rule4(t_token **token_list);
int valid_command(t_token **token_list)
{
t_token *cmd_start;
cmd_start = *token_list;
if (valid_command_rule1(token_list))
return (1);
*token_list = cmd_start;
if (valid_command_rule2(token_list))
return (1);
*token_list = cmd_start;
if (valid_command_rule3(token_list))
return (1);
*token_list = cmd_start;
if (valid_command_rule4(token_list))
return (1);
return (0);
}
// cmd_prefix cmd_name cmd_suffix
static int valid_command_rule1(t_token **token_list)
{
while (valid_io_redirect(token_list))
{
if (valid_token(token_list, T_WORD))
{
while (valid_token(token_list, T_WORD)
|| valid_io_redirect(token_list))
{
if (valid_command_separator(*token_list))
return (1);
}
}
}
return (0);
}
// cmd_prefix cmd_name
static int valid_command_rule2(t_token **token_list)
{
while (valid_io_redirect(token_list))
{
if (valid_token(token_list, T_WORD))
{
if (valid_command_separator(*token_list))
return (1);
}
}
return (0);
}
// cmd_name cmd_suffix
static int valid_command_rule3(t_token **token_list)
{
if (valid_token(token_list, T_WORD))
{
while (valid_token(token_list, T_WORD) || valid_io_redirect(token_list))
{
if (valid_command_separator(*token_list))
return (1);
}
}
return (0);
}
// cmd_name
static int valid_command_rule4(t_token **token_list)
{
if (valid_token(token_list, T_WORD))
{
if (valid_command_separator(*token_list))
return (1);
}
return (0);
}

View File

@@ -0,0 +1,44 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* valid_io_redirect.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: lperrey <lperrey@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2021/10/24 18:52:42 by lperrey #+# #+# */
/* Updated: 2021/10/24 19:21:35 by lperrey ### ########.fr */
/* */
/* ************************************************************************** */
#include "minishell.h"
static int valid_io_file(t_token **token_list);
static int valid_io_here(t_token **token_list);
int valid_io_redirect(t_token **token_list)
{
if (valid_io_file(token_list) || valid_io_here(token_list))
return (1);
return (0);
}
static int valid_io_file(t_token **token_list)
{
if (valid_token(token_list, '<') || valid_token(token_list, '>')
|| valid_token(token_list, T_DGREAT))
{
if (valid_token(token_list, T_WORD))
return (1);
}
return (0);
}
static int valid_io_here(t_token **token_list)
{
if (valid_token(token_list, T_DLESS))
{
if (valid_token(token_list, T_WORD))
return (1);
}
return (0);
}

View File

@@ -0,0 +1,27 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* valid_pipeline.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: lperrey <lperrey@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2021/10/24 18:51:35 by lperrey #+# #+# */
/* Updated: 2021/10/24 19:35:16 by lperrey ### ########.fr */
/* */
/* ************************************************************************** */
#include "minishell.h"
int valid_command(t_token **token_list);
int valid_pipeline(t_token **token_list)
{
while (valid_command(token_list))
{
if (*token_list == NULL)
return (1);
else if ((*token_list)->id != '|' || (*token_list)->next == NULL)
return (0);
valid_token(token_list, '|');
}
return (0);
}

View File

@@ -0,0 +1,44 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* valid_syntax.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: lperrey <lperrey@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2021/10/24 13:01:10 by lperrey #+# #+# */
/* Updated: 2021/10/24 19:40:23 by lperrey ### ########.fr */
/* */
/* ************************************************************************** */
#include "minishell.h"
int valid_pipeline(t_token **token_list);
int valid_syntax(t_token *token_list)
{
if (valid_pipeline(&token_list))
return (1);
else
{ // WIP ERROR
ft_putstr_fd("minishell: syntax error near unexpected token \'", 2);
ft_putstr_fd(token_list->content, 2);
ft_putstr_fd("\'\n", 2);
}
return (0);
}
int valid_token(t_token **token_list, enum e_token_id token_id)
{
if (*token_list != NULL && (*token_list)->id == token_id)
{
*token_list = (*token_list)->next;
return (1);
}
return (0);
}
int valid_command_separator(const t_token *token_list)
{
if (token_list == NULL || token_list->id == '|')
return (1);
return (0);
}

View File

@@ -6,7 +6,7 @@
/* By: lperrey <lperrey@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2021/10/04 05:59:26 by lperrey #+# #+# */
/* Updated: 2021/10/22 10:48:07 by lperrey ### ########.fr */
/* Updated: 2021/10/24 19:17:03 by lperrey ### ########.fr */
/* */
/* ************************************************************************** */
@@ -28,17 +28,23 @@ void shell_loop(t_all *c)
{
add_history(line_input);
c->token_list = input_to_tokens(line_input);
c->token_list = input_to_tokens(line_input);
// EXEC_PIPES_AND_CO()
// temp placeholder
if (!ft_strncmp(c->token_list->content, "env", 4))
if (ft_strncmp(c->token_list->content, "env", 4) == 0)
builtin_env(0, NULL, c);
else if (!ft_strncmp(c->token_list->content, "exit", 5))
else if (ft_strncmp(c->token_list->content, "exit", 5) == 0)
builtin_exit(0, NULL, c);
else if (!ft_strncmp(c->token_list->content, "echo", 5))
else if (ft_strncmp(c->token_list->content, "echo", 5) == 0)
builtin_echo(ft_lstsize((t_list *)c->token_list) + 1, tokens_list_to_argv(c->token_list), c);
else
{
if (parsing(c->token_list))
ft_putstr_fd("Syntax OK:\n-----------\n", 1);
else
ft_putstr_fd("Syntax KO:\n-----------\n", 1);
ft_putstr_fd("TOKENS LIST :\n-----------\n", 1);
ft_lstprint((t_list *)c->token_list, 1);
ft_lstclear((t_list **)&c->token_list, free);