Wip chunked decoding

+ Need to test normal body parsing
+ path_is_valid() renamed eval_file_type()
+ replaced atoi with strtol/strtoul
This commit is contained in:
LuckyLaszlo
2022-08-12 05:50:00 +02:00
parent ab0bc2c4c0
commit 400efbe720
18 changed files with 185 additions and 82 deletions

View File

@@ -1,12 +1,16 @@
IN 42 SUBJECT AND/OR PRIORITY :
- CGI
- chunked request (response not mandatory it seems)
- Need to test normal body parsing
- Ecrire des tests !
- handle redirection (Work, but weird behavior need deeper test)
- upload files with config "upload_dir"
- _determine_location() review (New version to complete and test)
- replace atoi() with a better function to avoid overflow
like strtol : https://www32.cplusplus.com/reference/cstdlib/strtol/
- replace std::string::npos with macro NPOS ?
-----------------------------
Si ce n'est pas deja fait :
- dans config, check erreur si port > 16bits
(peut-être check si ip > 32bits)
-----------------------------
- gerer le champ "Accept" du client
- gerer les ".." dans un URL (verifier que l'on ne sort pas du dossier "root")

View File

@@ -8,6 +8,7 @@
Client::Client()
: status(0),
header_complete(false),
body_complete(false),
request_complete(false),
read_body_size(0),
assigned_server(NULL),
@@ -23,6 +24,7 @@ Client::Client()
Client::Client(int afd, listen_socket *lsocket, std::string aport, std::string aip)
: status(0),
header_complete(false),
body_complete(false),
request_complete(false),
read_body_size(0),
assigned_server(NULL),
@@ -82,8 +84,9 @@ void Client::parse_request(std::vector<ServerConfig> &servers)
std::map<std::string, std::string> headers;
std::string body;
// DEBUG
// std::cout << "\nREQUEST ____________\n" << raw_request << "\n_____________\n";
if (raw_request.find(CRLF CRLF) == NPOS)
return ;
header_complete = true;
clear_request(); // not mandatory
_parse_request_line();
@@ -97,6 +100,7 @@ void Client::parse_request(std::vector<ServerConfig> &servers)
return;
_parse_port_hostname(this->get_rq_headers("Host"));
std::cerr << get_rq_method_str() << " " << get_rq_uri() << " " << get_rq_version() << "\n"; // DEBUG
/* dont clear raw_request, we need it for future reparsing of body
see call of parse_request() in _read_request() */
// raw_request.clear();
@@ -104,8 +108,89 @@ void Client::parse_request(std::vector<ServerConfig> &servers)
void Client::parse_request_body()
{
// TODO: check error and adjust status
_request.body = ::parse_http_body(raw_request);
size_t pos;
pos = raw_request.find(CRLF CRLF);
if (pos == NPOS)
{
std::cerr << "parse_request_body() bad call, header incomplete\n";
return;
}
pos += CRLF_SIZE*2;
// Chunked decoding WIP. Dont work.
if (!get_rq_headers("Transfer-Encoding").empty()
&& get_rq_headers("Transfer-Encoding") == "chunked")
{
size_t chunk_size = 1;
size_t chunk_field_end = 0;
char *endptr = NULL;
char *endptr_copy = NULL;
/* TODO: verify if last chunk in raw_request (to avoid multiples complete parsing)
but how ? with "raw_request.rfind("0" CRLF CRLF)", there no confirmation
that we have found the last last-chunk OR just some data */
_request.body = raw_request.substr(pos);
std::cerr << "______Chunked\n" << _request.body << "\n______\n";
pos = 0;
while (chunk_size != 0)
{
if (pos > _request.body.size())
{
std::cerr << "parse_request_body(), pos > size()\n";
// status = 400;
return;
}
if (pos == _request.body.size())
{
std::cerr << "parse_request_body(), will reread till last chunk\n";
return;
}
endptr_copy = endptr;
chunk_size = std::strtoul(&_request.body[pos], &endptr, 16);
if (chunk_size == LONG_MAX && errno == ERANGE)
status = 413;
if (endptr == endptr_copy)
{
std::cerr << "parse_request_body(), no conversion possible\n";
return;
}
chunk_field_end = _request.body.find(CRLF, pos);
if (chunk_field_end == NPOS)
{
std::cerr << "parse_request_body(), chunk_field no CRLF\n";
// status = 400;
return;
}
chunk_field_end += CRLF_SIZE;
_request.body.erase(pos, chunk_field_end);
pos += chunk_size + CRLF_SIZE;
}
_request.headers.erase("Transfer-Encoding");
body_complete = true;
}
else
{
if (raw_request.size() - pos >= std::strtoul(get_rq_headers("Content-Length").c_str(), NULL, 10))
{
_request.body = raw_request.substr(pos);
body_complete = true;
}
/* Should be equivalent */
// _request.body = raw_request.substr(pos);
// if (_request.body.size() >= std::strtoul(get_rq_headers("Content-Length").c_str(), NULL, 10))
// body_complete = true;
}
///////////////
// Body checks
if (_request.body.size() > assigned_server->client_body_limit)
status = 413;
}
@@ -132,6 +217,7 @@ void Client::clear()
{
clear_request();
header_complete = false;
body_complete = false;
request_complete = false;
read_body_size = 0;
assigned_server = NULL;
@@ -185,6 +271,7 @@ std::string Client::get_rq_port() const { return _request.port; }
std::string Client::get_rq_hostname() const { return _request.hostname; }
std::string Client::get_rq_script_path()const { return _request.script.path; }
std::string Client::get_rq_script_info()const { return _request.script.info; }
std::string Client::get_rq_headers(const std::string & key) const
{
std::map<std::string, std::string>::const_iterator it;
@@ -259,7 +346,7 @@ void Client::_parse_port_hostname(std::string host)
void Client::_check_request_errors()
{
//////////////////////
///////////////////////
// Request line checks
if (_request.method == UNKNOWN)
status = 501;
@@ -281,15 +368,21 @@ void Client::_check_request_errors()
response.append(CRLF CRLF);
}
if (status)
return;
/////////////////
//////////////////
// Headers checks
if (!this->get_rq_headers("Content-Length").empty()
&& ::atoi(this->get_rq_headers("Content-Length").c_str()) > (int)assigned_server->client_body_limit)
else if (!this->get_rq_headers("Content-Length").empty()
&& std::strtoul(this->get_rq_headers("Content-Length").c_str(), NULL, 10) > assigned_server->client_body_limit)
status = 413;
else if (!this->get_rq_headers("Transfer-Encoding").empty()
&& this->get_rq_headers("Transfer-Encoding") != "chunked" )
status = 501;
else if (!this->get_rq_headers("Content-Encoding").empty())
{
status = 415;
response.append("Accept-Encoding:"); // empty, no encoding accepted
response.append(CRLF);
}
return;
}
@@ -303,4 +396,3 @@ bool operator==(const Client& lhs, int fd)
{ return lhs.get_cl_fd() == fd; }
bool operator==(int fd, const Client& rhs)
{ return fd == rhs.get_cl_fd(); }

View File

@@ -46,8 +46,9 @@ class Client
std::string response;
unsigned int status;
bool header_complete;
bool body_complete;
bool request_complete;
size_t read_body_size;
size_t read_body_size; // unused for now
ServerConfig *assigned_server; // cant be const cause of error_pages.operator[]
const LocationConfig *assigned_location;

View File

@@ -1,14 +1,3 @@
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* ConfigParser.hpp :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: lperrey <lperrey@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2022/07/11 23:01:41 by me #+# #+# */
/* Updated: 2022/08/03 17:32:33 by lperrey ### ########.fr */
/* */
/* ************************************************************************** */
#ifndef CONFIGPARSER_HPP
# define CONFIGPARSER_HPP
@@ -22,7 +11,7 @@
# include <exception> // exception, what
# include <stdexcept> // runtime_error, invalid_argument
# include <string> // string
# include <cstdlib> // atoi (athough it's already cover by <string>)
# include <cstdlib> // strtol, stroul
# include <iostream> // cout, cin
# include <fstream> // ifstream
//# include <unistd.h> // access()

View File

@@ -25,7 +25,7 @@ public:
std::string root; // ./www/ or www work www/ and www work
// i do remove trailing / tho
unsigned int client_body_limit; // set to default max if none set
size_t client_body_limit; // set to default max if none set
std::vector<std::string> index;
std::map<int, std::string> error_pages;

View File

@@ -231,7 +231,7 @@ void ConfigParser::_set_server_values(ServerConfig *server, \
{
if (!::isNumeric(tmp_val[0]))
throw std::invalid_argument("client_body_limit not a number");
server->client_body_limit = atoi(tmp_val[0].c_str());
server->client_body_limit = std::strtoul(tmp_val[0].c_str(), NULL, 10);
}
else if (key == "index")
{
@@ -245,7 +245,7 @@ void ConfigParser::_set_server_values(ServerConfig *server, \
{
if (!(isNumeric_btw(400, 599, tmp_val[i])))
throw std::invalid_argument("invalid error code");
int status_code = atoi(tmp_val[i].c_str());
int status_code = std::strtoul(tmp_val[i].c_str(), NULL, 10);
if (server->error_pages.find(status_code) != server->error_pages.end())
throw std::invalid_argument("redeclaring error page");
server->error_pages[status_code] = path;
@@ -316,7 +316,7 @@ void ConfigParser::_set_location_values(LocationConfig *location, \
&& tmp_val[1].compare(0, 8, "https://"))
throw std::invalid_argument("bad redirect uri");
location->redirect_status = atoi(tmp_val[0].c_str());
location->redirect_status = std::strtoul(tmp_val[0].c_str(), NULL, 10);
location->redirect_uri = tmp_val[1];
}
else if (key == "upload_dir" && size == 1 && location->upload_dir == "")

View File

@@ -57,7 +57,7 @@ void ConfigParser::_post_processing(std::vector<ServerConfig> *servers)
// nothing to be done for cgi_ext, error_pages, redirect
// if (path_is_valid(it_l->root) == IS_DIR
// if (eval_file_type(it_l->root) == IS_DIR
// && it_l->path[it_l->path.size() - 1] != '/')
// it_l->path.push_back('/');
if (it_l->path[it_l->path.size() - 1] == '/'

View File

@@ -65,7 +65,7 @@ bool isNumeric_btw(int low, int high, std::string str)
if (std::isdigit(str[i]) == false)
return false;
}
int n = std::atoi(str.c_str());
int n = std::strtol(str.c_str(), NULL, 10);
if (n < low || n > high)
return false;
return true;
@@ -106,26 +106,23 @@ std::string http_methods_to_str(unsigned int methods)
# include <iostream>
// you could make this &path...
int path_is_valid(std::string path)
file_type eval_file_type(const std::string &path)
{
const char *tmp_path = path.c_str();
struct stat s;
if (stat(tmp_path, &s) == 0)
if (stat(tmp_path, &s) != -1)
{
if (S_ISREG(s.st_mode))
{
// std::cout << "is a file\n";
return (IS_FILE);
}
else if (S_ISDIR(s.st_mode))
{
// std::cout << "is a Dir\n";
return (IS_DIR);
}
}
// std::cout << "path is neither dir nor file\n";
else
{
std::perror("err stat()");
}
return (IS_OTHER);
}

View File

@@ -5,19 +5,32 @@
# include <vector>
# include <string>
# include <sstream>
# include <cstdlib> // atoi
# include <cstdlib> // strtol, strtoul
# include <climits> // LONG_MAX
# include <cerrno> // errno
# include <sys/stat.h> // stat()
# include <cctype> // tolower
# include <algorithm> // transform
# include <cstdio> // perror
# define CR "\r"
# define LF "\n"
# define CRLF CR LF
# define CRLF_SIZE 2
# define NPOS std::string::npos
# define IS_FILE 2
# define IS_DIR 1
# define IS_OTHER 0
/* Equivalent for end of http header size :
** std::string(CRLF CRLF).size();
** sizeof(CRLF CRLF) - 1;
** CRLF_SIZE*2
*/
enum file_type
{
IS_OTHER,
IS_FILE,
IS_DIR
};
enum http_method
{
@@ -46,7 +59,7 @@ std::string itos(int n);
std::string trim(std::string str, char c);
http_method str_to_http_method(std::string &str);
std::string http_methods_to_str(unsigned int methods);
int path_is_valid(std::string path);
file_type eval_file_type(const std::string &path);
void replace_all_substr(std::string &str, const std::string &ori_substr, const std::string &new_substr);
std::string str_tolower(std::string str);
void del_line_in_str(std::string * str, size_t pos, std::string delim);

View File

@@ -23,7 +23,7 @@
# include <algorithm> // find
# include <string> // string
# include <cstdio> // perror, remove
# include <cstdlib> // atoi (athough it's already cover by <string>)
# include <cstdlib> // strtol, strtoul
# include <dirent.h> // opendir()
# include "Client.hpp"

View File

@@ -146,7 +146,7 @@ void Webserv::_check_script_status(Client *client, std::string output)
if (pos != std::string::npos)
{
status_pos = pos + std::string("Status:").size();
client->status = atoi(output.c_str() + status_pos);
client->status = std::strtoul(output.c_str() + status_pos, NULL, 10);
::del_line_in_str(&output, pos, CRLF);
}
client->status = 200;

View File

@@ -76,7 +76,7 @@ void Webserv::_reopen_lsocket(std::vector<listen_socket>::iterator it)
// HUGO ADD END
try {
_bind(it->fd, std::atoi(it->port.c_str()), it->host);
_bind(it->fd, std::strtoul(it->port.c_str(), NULL, 10), it->host);
_listen(it->fd, 42); // 42 arbitrary
}
catch (const std::exception& e) {

View File

@@ -43,6 +43,7 @@
# define S405 "405 Method Not Allowed"
# define S408 "408 Request Timeout"
# define S413 "413 Content Too Large"
# define S415 "415 Unsupported Media Type"
# define S500 "500 Internal Server Error"
# define S501 "501 Not Implemented"

View File

@@ -48,7 +48,7 @@ void Webserv::init_virtual_servers(std::vector<ServerConfig>* servers)
//
// HUGO ADD END
_bind(new_socket.fd, std::atoi(it->port.c_str()), it->host);
_bind(new_socket.fd, std::strtoul(it->port.c_str(), NULL, 10), it->host);
_listen(new_socket.fd, 42); // 42 arbitrary
if (_epoll_update(new_socket.fd, EPOLLIN, EPOLL_CTL_ADD) == -1)

View File

@@ -1,7 +1,6 @@
#include "Webserv.hpp"
// TODO : path_is_valid() Macro for return value
void Webserv::_get(Client *client)
{
std::string path = client->get_rq_abs_path();
@@ -30,14 +29,14 @@ void Webserv::_get(Client *client)
// END TMP HUGO
// Index/Autoindex block
if (path_is_valid(path) == IS_DIR)
if (eval_file_type(path) == IS_DIR)
{
std::cout << "made it to Index/Autoindex\n";
if (path[path.size() - 1] != '/')
path.push_back('/');
for (size_t i = 0; i < client->assigned_location->index.size(); i++)
{
if (path_is_valid(path + client->assigned_location->index[i]) == 2)
if (eval_file_type(path + client->assigned_location->index[i]) == IS_FILE)
{
path.append(client->assigned_location->index[i]);
_get_file(client, path);

View File

@@ -25,12 +25,14 @@ void Webserv::_request(Client *client)
}
else if (ret == READ_COMPLETE)
{
if (client->body_complete)
std::cerr << "______BODY\n" << client->get_rq_body() << "\n______\n"; // DEBUG
_epoll_update(client->get_cl_fd(), EPOLLOUT, EPOLL_CTL_MOD);
client->request_complete = true;
}
}
int Webserv::_read_request(Client *client) // Messy, Need refactoring
int Webserv::_read_request(Client *client)
{
char buf[BUFSIZE];
ssize_t ret;
@@ -41,8 +43,6 @@ int Webserv::_read_request(Client *client) // Messy, Need refactoring
if (ret == -1)
{
std::perror("err recv()");
std::cerr << "client ptr =" << client << "\n"; // DEBUG
std::cerr << "client.fd =" << client->get_cl_fd() << "\n"; // DEBUG
return READ_CLOSE;
}
if (ret == 0)
@@ -50,41 +50,32 @@ int Webserv::_read_request(Client *client) // Messy, Need refactoring
std::cerr << "recv() read 0, then close client" << "\n"; // DEBUG
return READ_CLOSE;
}
client->raw_request.append(buf, ret);
if (!client->header_complete)
{
if (client->raw_request.find(CRLF CRLF) != std::string::npos)
client->parse_request(_servers);
if (client->status)
return READ_COMPLETE;
if (client->header_complete)
{
client->header_complete = true;
client->parse_request(_servers);
std::cerr << client->get_rq_method_str() << " " << client->get_rq_uri() << " " << client->get_rq_version() << "\n"; // DEBUG
if (client->status)
return READ_COMPLETE;
if (client->get_rq_headers("Content-Type").empty() && client->get_rq_headers("Content-Length").empty()) // No body case
return READ_COMPLETE;
if (client->get_rq_headers("Content-Type").empty()
&& client->get_rq_headers("Content-Length").empty()
&& client->get_rq_headers("Transfer-Encoding").empty())
return READ_COMPLETE; // No body case
}
else if (client->raw_request.size() > MAX_HEADER_SIZE)
{
// 413 or 400 ? 413 seems common among http server, but don't fit perfectly.
{ // 413 or 400 ? 413 seems common among http server, but don't fit perfectly.
client->status = 413;
return READ_COMPLETE;
}
}
else if (client->header_complete)
{
client->read_body_size += ret;
if (client->read_body_size > client->assigned_server->client_body_limit)
{
client->status = 413;
// client->read_body_size += ret; // Not accurate, part of body could have been read with headers, unused for now
client->parse_request_body();
if (client->status || client->body_complete)
return READ_COMPLETE;
}
if ((int)client->read_body_size >= ::atoi(client->get_rq_headers("Content-Length").c_str()))
{
client->parse_request_body();
return READ_COMPLETE;
}
}
return READ_IN_PROGRESS;

View File

@@ -20,8 +20,9 @@ void Webserv::run()
nfds = ::epoll_wait(_epfd, events, MAX_EVENTS, TIMEOUT);
if (nfds == -1)
{
int errno_copy = errno;
std::perror("err epoll_wait()");
if (errno == EINTR)
if (errno_copy == EINTR)
g_run = false;
else
throw std::runtime_error("Epoll wait");

15
test_chunk.txt Normal file
View File

@@ -0,0 +1,15 @@
https://en.wikipedia.org/wiki/Chunked_transfer_encoding#Example
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Transfer-Encoding
GET / HTTP/1.1
Host: localhost:4040
Accept: */*
Transfer-Encoding: chunked
7
Mozilla
9
Developer
7
Network
0