From 400efbe720c89ec8d7aa43a56454051a9e70ac37 Mon Sep 17 00:00:00 2001 From: LuckyLaszlo Date: Fri, 12 Aug 2022 05:50:00 +0200 Subject: [PATCH] Wip chunked decoding + Need to test normal body parsing + path_is_valid() renamed eval_file_type() + replaced atoi with strtol/strtoul --- memo.txt | 8 ++- srcs/Client.cpp | 118 +++++++++++++++++++++++++++++---- srcs/Client.hpp | 3 +- srcs/config/ConfigParser.hpp | 13 +--- srcs/config/ServerConfig.hpp | 2 +- srcs/config/parser.cpp | 6 +- srcs/config/postProcessing.cpp | 2 +- srcs/utils.cpp | 19 +++--- srcs/utils.hpp | 23 +++++-- srcs/webserv/Webserv.hpp | 2 +- srcs/webserv/cgi_script.cpp | 2 +- srcs/webserv/close.cpp | 2 +- srcs/webserv/http_status.hpp | 1 + srcs/webserv/init.cpp | 2 +- srcs/webserv/method_get.cpp | 5 +- srcs/webserv/request.cpp | 41 +++++------- srcs/webserv/run_loop.cpp | 3 +- test_chunk.txt | 15 +++++ 18 files changed, 185 insertions(+), 82 deletions(-) create mode 100644 test_chunk.txt diff --git a/memo.txt b/memo.txt index b6cd444..88c5890 100644 --- a/memo.txt +++ b/memo.txt @@ -1,12 +1,16 @@ IN 42 SUBJECT AND/OR PRIORITY : - CGI - chunked request (response not mandatory it seems) +- Need to test normal body parsing - Ecrire des tests ! - handle redirection (Work, but weird behavior need deeper test) - upload files with config "upload_dir" - _determine_location() review (New version to complete and test) -- replace atoi() with a better function to avoid overflow - like strtol : https://www32.cplusplus.com/reference/cstdlib/strtol/ +- replace std::string::npos with macro NPOS ? +----------------------------- +Si ce n'est pas deja fait : +- dans config, check erreur si port > 16bits +(peut-ĂȘtre check si ip > 32bits) ----------------------------- - gerer le champ "Accept" du client - gerer les ".." dans un URL (verifier que l'on ne sort pas du dossier "root") diff --git a/srcs/Client.cpp b/srcs/Client.cpp index 0b30006..e388257 100644 --- a/srcs/Client.cpp +++ b/srcs/Client.cpp @@ -8,6 +8,7 @@ Client::Client() : status(0), header_complete(false), + body_complete(false), request_complete(false), read_body_size(0), assigned_server(NULL), @@ -23,6 +24,7 @@ Client::Client() Client::Client(int afd, listen_socket *lsocket, std::string aport, std::string aip) : status(0), header_complete(false), + body_complete(false), request_complete(false), read_body_size(0), assigned_server(NULL), @@ -82,8 +84,9 @@ void Client::parse_request(std::vector &servers) std::map headers; std::string body; -// DEBUG -// std::cout << "\nREQUEST ____________\n" << raw_request << "\n_____________\n"; + if (raw_request.find(CRLF CRLF) == NPOS) + return ; + header_complete = true; clear_request(); // not mandatory _parse_request_line(); @@ -97,6 +100,7 @@ void Client::parse_request(std::vector &servers) return; _parse_port_hostname(this->get_rq_headers("Host")); + std::cerr << get_rq_method_str() << " " << get_rq_uri() << " " << get_rq_version() << "\n"; // DEBUG /* dont clear raw_request, we need it for future reparsing of body see call of parse_request() in _read_request() */ // raw_request.clear(); @@ -104,8 +108,89 @@ void Client::parse_request(std::vector &servers) void Client::parse_request_body() { - // TODO: check error and adjust status - _request.body = ::parse_http_body(raw_request); + size_t pos; + pos = raw_request.find(CRLF CRLF); + if (pos == NPOS) + { + std::cerr << "parse_request_body() bad call, header incomplete\n"; + return; + } + pos += CRLF_SIZE*2; + + // Chunked decoding WIP. Dont work. + if (!get_rq_headers("Transfer-Encoding").empty() + && get_rq_headers("Transfer-Encoding") == "chunked") + { + size_t chunk_size = 1; + size_t chunk_field_end = 0; + char *endptr = NULL; + char *endptr_copy = NULL; + /* TODO: verify if last chunk in raw_request (to avoid multiples complete parsing) + but how ? with "raw_request.rfind("0" CRLF CRLF)", there no confirmation + that we have found the last last-chunk OR just some data */ + + _request.body = raw_request.substr(pos); + + std::cerr << "______Chunked\n" << _request.body << "\n______\n"; + pos = 0; + while (chunk_size != 0) + { + if (pos > _request.body.size()) + { + std::cerr << "parse_request_body(), pos > size()\n"; + // status = 400; + return; + } + + if (pos == _request.body.size()) + { + std::cerr << "parse_request_body(), will reread till last chunk\n"; + return; + } + + endptr_copy = endptr; + chunk_size = std::strtoul(&_request.body[pos], &endptr, 16); + if (chunk_size == LONG_MAX && errno == ERANGE) + status = 413; + if (endptr == endptr_copy) + { + std::cerr << "parse_request_body(), no conversion possible\n"; + return; + } + + + chunk_field_end = _request.body.find(CRLF, pos); + if (chunk_field_end == NPOS) + { + std::cerr << "parse_request_body(), chunk_field no CRLF\n"; + // status = 400; + return; + } + + chunk_field_end += CRLF_SIZE; + _request.body.erase(pos, chunk_field_end); + pos += chunk_size + CRLF_SIZE; + } + + _request.headers.erase("Transfer-Encoding"); + body_complete = true; + } + else + { + if (raw_request.size() - pos >= std::strtoul(get_rq_headers("Content-Length").c_str(), NULL, 10)) + { + _request.body = raw_request.substr(pos); + body_complete = true; + } + + /* Should be equivalent */ + // _request.body = raw_request.substr(pos); + // if (_request.body.size() >= std::strtoul(get_rq_headers("Content-Length").c_str(), NULL, 10)) + // body_complete = true; + } + +/////////////// +// Body checks if (_request.body.size() > assigned_server->client_body_limit) status = 413; } @@ -132,6 +217,7 @@ void Client::clear() { clear_request(); header_complete = false; + body_complete = false; request_complete = false; read_body_size = 0; assigned_server = NULL; @@ -185,6 +271,7 @@ std::string Client::get_rq_port() const { return _request.port; } std::string Client::get_rq_hostname() const { return _request.hostname; } std::string Client::get_rq_script_path()const { return _request.script.path; } std::string Client::get_rq_script_info()const { return _request.script.info; } + std::string Client::get_rq_headers(const std::string & key) const { std::map::const_iterator it; @@ -259,7 +346,7 @@ void Client::_parse_port_hostname(std::string host) void Client::_check_request_errors() { -////////////////////// +/////////////////////// // Request line checks if (_request.method == UNKNOWN) status = 501; @@ -281,15 +368,21 @@ void Client::_check_request_errors() response.append(CRLF CRLF); } - if (status) - return; - -///////////////// +////////////////// // Headers checks - if (!this->get_rq_headers("Content-Length").empty() - && ::atoi(this->get_rq_headers("Content-Length").c_str()) > (int)assigned_server->client_body_limit) + else if (!this->get_rq_headers("Content-Length").empty() + && std::strtoul(this->get_rq_headers("Content-Length").c_str(), NULL, 10) > assigned_server->client_body_limit) status = 413; - + else if (!this->get_rq_headers("Transfer-Encoding").empty() + && this->get_rq_headers("Transfer-Encoding") != "chunked" ) + status = 501; + else if (!this->get_rq_headers("Content-Encoding").empty()) + { + status = 415; + response.append("Accept-Encoding:"); // empty, no encoding accepted + response.append(CRLF); + } + return; } @@ -303,4 +396,3 @@ bool operator==(const Client& lhs, int fd) { return lhs.get_cl_fd() == fd; } bool operator==(int fd, const Client& rhs) { return fd == rhs.get_cl_fd(); } - diff --git a/srcs/Client.hpp b/srcs/Client.hpp index 325abc3..b87c84c 100644 --- a/srcs/Client.hpp +++ b/srcs/Client.hpp @@ -46,8 +46,9 @@ class Client std::string response; unsigned int status; bool header_complete; + bool body_complete; bool request_complete; - size_t read_body_size; + size_t read_body_size; // unused for now ServerConfig *assigned_server; // cant be const cause of error_pages.operator[] const LocationConfig *assigned_location; diff --git a/srcs/config/ConfigParser.hpp b/srcs/config/ConfigParser.hpp index 32197eb..4f0d406 100644 --- a/srcs/config/ConfigParser.hpp +++ b/srcs/config/ConfigParser.hpp @@ -1,14 +1,3 @@ -/* ************************************************************************** */ -/* */ -/* ::: :::::::: */ -/* ConfigParser.hpp :+: :+: :+: */ -/* +:+ +:+ +:+ */ -/* By: lperrey +#+ +:+ +#+ */ -/* +#+#+#+#+#+ +#+ */ -/* Created: 2022/07/11 23:01:41 by me #+# #+# */ -/* Updated: 2022/08/03 17:32:33 by lperrey ### ########.fr */ -/* */ -/* ************************************************************************** */ #ifndef CONFIGPARSER_HPP # define CONFIGPARSER_HPP @@ -22,7 +11,7 @@ # include // exception, what # include // runtime_error, invalid_argument # include // string -# include // atoi (athough it's already cover by ) +# include // strtol, stroul # include // cout, cin # include // ifstream //# include // access() diff --git a/srcs/config/ServerConfig.hpp b/srcs/config/ServerConfig.hpp index c0feecf..c2fbbb0 100644 --- a/srcs/config/ServerConfig.hpp +++ b/srcs/config/ServerConfig.hpp @@ -25,7 +25,7 @@ public: std::string root; // ./www/ or www work www/ and www work // i do remove trailing / tho - unsigned int client_body_limit; // set to default max if none set + size_t client_body_limit; // set to default max if none set std::vector index; std::map error_pages; diff --git a/srcs/config/parser.cpp b/srcs/config/parser.cpp index fe47799..6e6a914 100644 --- a/srcs/config/parser.cpp +++ b/srcs/config/parser.cpp @@ -231,7 +231,7 @@ void ConfigParser::_set_server_values(ServerConfig *server, \ { if (!::isNumeric(tmp_val[0])) throw std::invalid_argument("client_body_limit not a number"); - server->client_body_limit = atoi(tmp_val[0].c_str()); + server->client_body_limit = std::strtoul(tmp_val[0].c_str(), NULL, 10); } else if (key == "index") { @@ -245,7 +245,7 @@ void ConfigParser::_set_server_values(ServerConfig *server, \ { if (!(isNumeric_btw(400, 599, tmp_val[i]))) throw std::invalid_argument("invalid error code"); - int status_code = atoi(tmp_val[i].c_str()); + int status_code = std::strtoul(tmp_val[i].c_str(), NULL, 10); if (server->error_pages.find(status_code) != server->error_pages.end()) throw std::invalid_argument("redeclaring error page"); server->error_pages[status_code] = path; @@ -316,7 +316,7 @@ void ConfigParser::_set_location_values(LocationConfig *location, \ && tmp_val[1].compare(0, 8, "https://")) throw std::invalid_argument("bad redirect uri"); - location->redirect_status = atoi(tmp_val[0].c_str()); + location->redirect_status = std::strtoul(tmp_val[0].c_str(), NULL, 10); location->redirect_uri = tmp_val[1]; } else if (key == "upload_dir" && size == 1 && location->upload_dir == "") diff --git a/srcs/config/postProcessing.cpp b/srcs/config/postProcessing.cpp index bfc3a29..6ccfc2f 100644 --- a/srcs/config/postProcessing.cpp +++ b/srcs/config/postProcessing.cpp @@ -57,7 +57,7 @@ void ConfigParser::_post_processing(std::vector *servers) // nothing to be done for cgi_ext, error_pages, redirect -// if (path_is_valid(it_l->root) == IS_DIR +// if (eval_file_type(it_l->root) == IS_DIR // && it_l->path[it_l->path.size() - 1] != '/') // it_l->path.push_back('/'); if (it_l->path[it_l->path.size() - 1] == '/' diff --git a/srcs/utils.cpp b/srcs/utils.cpp index 1961183..1e1b359 100644 --- a/srcs/utils.cpp +++ b/srcs/utils.cpp @@ -65,7 +65,7 @@ bool isNumeric_btw(int low, int high, std::string str) if (std::isdigit(str[i]) == false) return false; } - int n = std::atoi(str.c_str()); + int n = std::strtol(str.c_str(), NULL, 10); if (n < low || n > high) return false; return true; @@ -106,26 +106,23 @@ std::string http_methods_to_str(unsigned int methods) # include -// you could make this &path... -int path_is_valid(std::string path) +file_type eval_file_type(const std::string &path) { const char *tmp_path = path.c_str(); struct stat s; - if (stat(tmp_path, &s) == 0) + if (stat(tmp_path, &s) != -1) { if (S_ISREG(s.st_mode)) - { -// std::cout << "is a file\n"; return (IS_FILE); - } else if (S_ISDIR(s.st_mode)) - { -// std::cout << "is a Dir\n"; return (IS_DIR); - } } -// std::cout << "path is neither dir nor file\n"; + else + { + std::perror("err stat()"); + } + return (IS_OTHER); } diff --git a/srcs/utils.hpp b/srcs/utils.hpp index 8dfd258..7667c5e 100644 --- a/srcs/utils.hpp +++ b/srcs/utils.hpp @@ -5,19 +5,32 @@ # include # include # include -# include // atoi +# include // strtol, strtoul +# include // LONG_MAX +# include // errno # include // stat() # include // tolower # include // transform +# include // perror # define CR "\r" # define LF "\n" # define CRLF CR LF +# define CRLF_SIZE 2 +# define NPOS std::string::npos -# define IS_FILE 2 -# define IS_DIR 1 -# define IS_OTHER 0 +/* Equivalent for end of http header size : +** std::string(CRLF CRLF).size(); +** sizeof(CRLF CRLF) - 1; +** CRLF_SIZE*2 +*/ +enum file_type +{ + IS_OTHER, + IS_FILE, + IS_DIR +}; enum http_method { @@ -46,7 +59,7 @@ std::string itos(int n); std::string trim(std::string str, char c); http_method str_to_http_method(std::string &str); std::string http_methods_to_str(unsigned int methods); -int path_is_valid(std::string path); +file_type eval_file_type(const std::string &path); void replace_all_substr(std::string &str, const std::string &ori_substr, const std::string &new_substr); std::string str_tolower(std::string str); void del_line_in_str(std::string * str, size_t pos, std::string delim); diff --git a/srcs/webserv/Webserv.hpp b/srcs/webserv/Webserv.hpp index 71b8e21..b0c5339 100644 --- a/srcs/webserv/Webserv.hpp +++ b/srcs/webserv/Webserv.hpp @@ -23,7 +23,7 @@ # include // find # include // string # include // perror, remove -# include // atoi (athough it's already cover by ) +# include // strtol, strtoul # include // opendir() # include "Client.hpp" diff --git a/srcs/webserv/cgi_script.cpp b/srcs/webserv/cgi_script.cpp index be03cbe..7e237dd 100644 --- a/srcs/webserv/cgi_script.cpp +++ b/srcs/webserv/cgi_script.cpp @@ -146,7 +146,7 @@ void Webserv::_check_script_status(Client *client, std::string output) if (pos != std::string::npos) { status_pos = pos + std::string("Status:").size(); - client->status = atoi(output.c_str() + status_pos); + client->status = std::strtoul(output.c_str() + status_pos, NULL, 10); ::del_line_in_str(&output, pos, CRLF); } client->status = 200; diff --git a/srcs/webserv/close.cpp b/srcs/webserv/close.cpp index d021245..f71add7 100644 --- a/srcs/webserv/close.cpp +++ b/srcs/webserv/close.cpp @@ -76,7 +76,7 @@ void Webserv::_reopen_lsocket(std::vector::iterator it) // HUGO ADD END try { - _bind(it->fd, std::atoi(it->port.c_str()), it->host); + _bind(it->fd, std::strtoul(it->port.c_str(), NULL, 10), it->host); _listen(it->fd, 42); // 42 arbitrary } catch (const std::exception& e) { diff --git a/srcs/webserv/http_status.hpp b/srcs/webserv/http_status.hpp index b002129..f519ec7 100644 --- a/srcs/webserv/http_status.hpp +++ b/srcs/webserv/http_status.hpp @@ -43,6 +43,7 @@ # define S405 "405 Method Not Allowed" # define S408 "408 Request Timeout" # define S413 "413 Content Too Large" +# define S415 "415 Unsupported Media Type" # define S500 "500 Internal Server Error" # define S501 "501 Not Implemented" diff --git a/srcs/webserv/init.cpp b/srcs/webserv/init.cpp index 4f66cfd..d2ab890 100644 --- a/srcs/webserv/init.cpp +++ b/srcs/webserv/init.cpp @@ -48,7 +48,7 @@ void Webserv::init_virtual_servers(std::vector* servers) // // HUGO ADD END - _bind(new_socket.fd, std::atoi(it->port.c_str()), it->host); + _bind(new_socket.fd, std::strtoul(it->port.c_str(), NULL, 10), it->host); _listen(new_socket.fd, 42); // 42 arbitrary if (_epoll_update(new_socket.fd, EPOLLIN, EPOLL_CTL_ADD) == -1) diff --git a/srcs/webserv/method_get.cpp b/srcs/webserv/method_get.cpp index 12b5751..b4b9baa 100644 --- a/srcs/webserv/method_get.cpp +++ b/srcs/webserv/method_get.cpp @@ -1,7 +1,6 @@ #include "Webserv.hpp" -// TODO : path_is_valid() Macro for return value void Webserv::_get(Client *client) { std::string path = client->get_rq_abs_path(); @@ -30,14 +29,14 @@ void Webserv::_get(Client *client) // END TMP HUGO // Index/Autoindex block - if (path_is_valid(path) == IS_DIR) + if (eval_file_type(path) == IS_DIR) { std::cout << "made it to Index/Autoindex\n"; if (path[path.size() - 1] != '/') path.push_back('/'); for (size_t i = 0; i < client->assigned_location->index.size(); i++) { - if (path_is_valid(path + client->assigned_location->index[i]) == 2) + if (eval_file_type(path + client->assigned_location->index[i]) == IS_FILE) { path.append(client->assigned_location->index[i]); _get_file(client, path); diff --git a/srcs/webserv/request.cpp b/srcs/webserv/request.cpp index b70056c..24f2a80 100644 --- a/srcs/webserv/request.cpp +++ b/srcs/webserv/request.cpp @@ -25,12 +25,14 @@ void Webserv::_request(Client *client) } else if (ret == READ_COMPLETE) { + if (client->body_complete) + std::cerr << "______BODY\n" << client->get_rq_body() << "\n______\n"; // DEBUG _epoll_update(client->get_cl_fd(), EPOLLOUT, EPOLL_CTL_MOD); client->request_complete = true; } } -int Webserv::_read_request(Client *client) // Messy, Need refactoring +int Webserv::_read_request(Client *client) { char buf[BUFSIZE]; ssize_t ret; @@ -41,8 +43,6 @@ int Webserv::_read_request(Client *client) // Messy, Need refactoring if (ret == -1) { std::perror("err recv()"); - std::cerr << "client ptr =" << client << "\n"; // DEBUG - std::cerr << "client.fd =" << client->get_cl_fd() << "\n"; // DEBUG return READ_CLOSE; } if (ret == 0) @@ -50,41 +50,32 @@ int Webserv::_read_request(Client *client) // Messy, Need refactoring std::cerr << "recv() read 0, then close client" << "\n"; // DEBUG return READ_CLOSE; } - client->raw_request.append(buf, ret); + if (!client->header_complete) { - if (client->raw_request.find(CRLF CRLF) != std::string::npos) + client->parse_request(_servers); + if (client->status) + return READ_COMPLETE; + if (client->header_complete) { - client->header_complete = true; - client->parse_request(_servers); - std::cerr << client->get_rq_method_str() << " " << client->get_rq_uri() << " " << client->get_rq_version() << "\n"; // DEBUG - if (client->status) - return READ_COMPLETE; - - if (client->get_rq_headers("Content-Type").empty() && client->get_rq_headers("Content-Length").empty()) // No body case - return READ_COMPLETE; + if (client->get_rq_headers("Content-Type").empty() + && client->get_rq_headers("Content-Length").empty() + && client->get_rq_headers("Transfer-Encoding").empty()) + return READ_COMPLETE; // No body case } else if (client->raw_request.size() > MAX_HEADER_SIZE) - { - // 413 or 400 ? 413 seems common among http server, but don't fit perfectly. + { // 413 or 400 ? 413 seems common among http server, but don't fit perfectly. client->status = 413; return READ_COMPLETE; } } else if (client->header_complete) { - client->read_body_size += ret; - if (client->read_body_size > client->assigned_server->client_body_limit) - { - client->status = 413; + // client->read_body_size += ret; // Not accurate, part of body could have been read with headers, unused for now + client->parse_request_body(); + if (client->status || client->body_complete) return READ_COMPLETE; - } - if ((int)client->read_body_size >= ::atoi(client->get_rq_headers("Content-Length").c_str())) - { - client->parse_request_body(); - return READ_COMPLETE; - } } return READ_IN_PROGRESS; diff --git a/srcs/webserv/run_loop.cpp b/srcs/webserv/run_loop.cpp index 108a226..cf54e81 100644 --- a/srcs/webserv/run_loop.cpp +++ b/srcs/webserv/run_loop.cpp @@ -20,8 +20,9 @@ void Webserv::run() nfds = ::epoll_wait(_epfd, events, MAX_EVENTS, TIMEOUT); if (nfds == -1) { + int errno_copy = errno; std::perror("err epoll_wait()"); - if (errno == EINTR) + if (errno_copy == EINTR) g_run = false; else throw std::runtime_error("Epoll wait"); diff --git a/test_chunk.txt b/test_chunk.txt new file mode 100644 index 0000000..8c55d27 --- /dev/null +++ b/test_chunk.txt @@ -0,0 +1,15 @@ +https://en.wikipedia.org/wiki/Chunked_transfer_encoding#Example +https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Transfer-Encoding + +GET / HTTP/1.1 +Host: localhost:4040 +Accept: */* +Transfer-Encoding: chunked + +7 +Mozilla +9 +Developer +7 +Network +0