added multipart upload file,

it works, but need some adjustements,
refactoring and testing
This commit is contained in:
LuckyLaszlo
2022-08-14 06:25:06 +02:00
parent b0949615c8
commit 84babec82b
19 changed files with 398 additions and 86 deletions

View File

@@ -5,7 +5,8 @@ CXX = clang++
CXXFLAGS = -Wall -Wextra #-Werror
CXXFLAGS += $(HEADERS_D:%=-I%)
CXXFLAGS += -std=c++98
CXXFLAGS += -g3
CXXFLAGS += -g
CXXFLAGS += -fno-limit-debug-info
CXXFLAGS += -MMD -MP #header dependencie
#CXXFLAGS += -O3
@@ -29,7 +30,7 @@ SRCS = main.cpp \
postProcessing.cpp \
utils.cpp \
cgi_script.cpp \
Client.cpp \
Client.cpp Client_multipart_body.cpp \
OBJS_D = builds
OBJS = $(SRCS:%.cpp=$(OBJS_D)/%.o)

View File

@@ -7,7 +7,7 @@ server {
listen 0.0.0.0:4040;
# client_body_limit asdfa;
# client_body_limit 400;
client_body_limit 3000000;
index index.html; # this is another comment
@@ -19,6 +19,19 @@ server {
autoindex on;
}
location /upload {
allow_methods POST;
autoindex on;
upload_dir ./www/user_files/; # TODO: append a '/' if there is none ?
# root doesnt matter if used only with POST and no CGI
}
location /the_dump {
allow_methods GET;
root ./www/user_files;
autoindex on;
}
location /redirect {
redirect 307 https://fr.wikipedia.org/wiki/Ketchup;
# redirect 307 https://www.youtube.com/watch?v=rG6b8gjMEkw;
@@ -50,13 +63,11 @@ server {
location /test/test_deeper/ {
# allow_methods
autoindex on;
root ./www/test/test_deeper/;
}
location /test/test_deeper/super_deep {
root ./www/test/test_deeper/super_deep/;
autoindex on;
}
# location /test/test_deeper/something.html {

View File

@@ -1,14 +1,15 @@
IN 42 SUBJECT AND/OR PRIORITY :
- CGI (TODO HUGO)
- chunked request (WIP, a bit difficult)
- chunked request (need testing)
- Need to test normal body parsing
- basic html upload page for testing request of web browser
- upload files with config "upload_dir"
- upload files testing and adjustements
- https://nginx.org/en/docs/http/ngx_http_core_module.html#client_max_body_size
Config en Ko (value * 2^10) serait plus commode que en octet
Et 0 valeur special pour desactiver
- Ecrire des tests !
- handle redirection (Work, but weird behavior need deeper test)
- _determine_location() review (New version to complete and test)
-----------------------------
Si ce n'est pas deja fait :
- dans config, check erreur si port > 16bits

53
multipart_request.txt Normal file
View File

@@ -0,0 +1,53 @@
POST /upload HTTP/1.1
Host: localhost:4040
Connection: keep-alive
Content-Length: 364
Cache-Control: max-age=0
sec-ch-ua: "Chromium";v="104", " Not A;Brand";v="99", "Google Chrome";v="104"
sec-ch-ua-mobile: ?0
sec-ch-ua-platform: "Windows"
Upgrade-Insecure-Requests: 1
Origin: http://localhost:4040
Content-Type: multipart/form-data; boundary=----WebKitFormBoundaryeIV4xrEzThmNUcJf
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9
Sec-Fetch-Site: same-origin
Sec-Fetch-Mode: navigate
Sec-Fetch-User: ?1
Sec-Fetch-Dest: document
Referer: http://localhost:4040/upload_form.html
Accept-Encoding: gzip, deflate, br
Accept-Language: fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7
dnt: 1
sec-gpc: 1
------WebKitFormBoundaryeIV4xrEzThmNUcJf
Content-Disposition: form-data; name="upload_file"; filename=".gitignore"
Content-Type: text/plain
.DS_Store
Thumbs.db
*.o
*.d
*.swp
*.out
*.exe
*.stackdump
*.a
*.so
*.dSYM
.vscode
*.lnk
*.zip
builds
ubuntu_tester
ubuntu_cgi_tester
webserv
!**/webserv/
*.log
large.jpg
------WebKitFormBoundaryeIV4xrEzThmNUcJf--

View File

@@ -101,7 +101,7 @@ void Client::parse_request_headers(std::vector<ServerConfig> &servers)
_check_request_errors();
if (status)
return;
_parse_port_hostname(this->get_rq_headers("Host")); // use getter for headers because it works case insensitive
_parse_port_hostname(get_rq_headers("Host")); // use getter for headers because it works case insensitive
// DEBUG
// std::cerr << get_rq_method_str() << " " << get_rq_target() << " " << get_rq_version() << "\n";
@@ -121,14 +121,38 @@ void Client::parse_request_body()
{
std::cerr << "parse_request_body() bad call, header incomplete\n";
// QUESTION from hugo : don't we change the status here ?
// RESPONSE from luke : C'est vrai. Peut-être mettre un 500, c'etait plus du debug à la base.
// C'est seulement si on appelle la fonction au mauvais endroit, avant d'avoir un header complet, que ça arrive.
return;
}
pos += CRLF_SIZE*2;
// Chunked decoding WIP. Dont work.
if (!get_rq_headers("Transfer-Encoding").empty()
&& get_rq_headers("Transfer-Encoding") == "chunked")
{
// Chunked decoding WIP. How to test this ? dont know how to send chunks with telnet.
_parse_chunked_body(pos + CRLF_SIZE*2);
}
else if (raw_request.size() - pos >= std::strtoul(get_rq_headers("Content-Length").c_str(), NULL, 10))
{
if (get_rq_headers("Content-Type").find("multipart/form-data") != NPOS)
_parse_multipart_body(pos);
else
_request.body = raw_request.substr(pos + CRLF_SIZE*2);
body_complete = true;
}
// std::cerr << "Content-Length = " << std::strtoul(get_rq_headers("Content-Length").c_str(), NULL, 10) << "\n";
// std::cerr << "raw_request.size() - pos = " << raw_request.size() - pos << "\n";
// _request.body = raw_request.substr(pos);
// std::cerr << "_request.body.size() = " << _request.body.size() << "\n";
///////////////
// Body checks
if (_request.body.size() > assigned_server->client_body_limit)
status = 413; // HTTP Client Errors
}
void Client::_parse_chunked_body(size_t pos)
{
size_t chunk_size = 1;
size_t chunk_field_end = 0;
char *endptr = NULL;
@@ -183,32 +207,10 @@ void Client::parse_request_body()
_request.headers.erase("Transfer-Encoding");
body_complete = true;
}
else
{
std::cerr << "Content-Length = " << std::strtoul(get_rq_headers("Content-Length").c_str(), NULL, 10) << "\n";
std::cerr << "raw_request.size() - pos = " << raw_request.size() - pos << "\n";
_request.body = raw_request.substr(pos);
std::cerr << "_request.body.size() = " << _request.body.size() << "\n";
if (raw_request.size() - pos >= std::strtoul(get_rq_headers("Content-Length").c_str(), NULL, 10))
{
_request.body = raw_request.substr(pos);
body_complete = true;
}
/* Should be equivalent */
// _request.body = raw_request.substr(pos);
// if (_request.body.size() >= std::strtoul(get_rq_headers("Content-Length").c_str(), NULL, 10))
// body_complete = true;
}
///////////////
// Body checks
if (_request.body.size() > assigned_server->client_body_limit)
status = 413; // HTTP Client Errors
}
// TODO HUGO : faire la fonction, mdr.
void Client::fill_script_path(const std::string &path, size_t pos)
{
@@ -254,6 +256,7 @@ void Client::clear_request()
_request.version.clear();
_request.headers.clear();
_request.body.clear();
_request.multi_bodys.clear();
_request.abs_path.clear();
_request.query.clear();
_request.port.clear();
@@ -376,7 +379,7 @@ void Client::_parse_request_fields()
// delete first line
pos = headers.find(CRLF);
if (pos != NPOS)
headers.erase(0, pos + std::string(CRLF).size());
headers.erase(0, pos + CRLF_SIZE);
// delete body part
pos = headers.find(CRLF CRLF);
if (pos != NPOS)
@@ -415,6 +418,9 @@ void Client::_parse_port_hostname(std::string host)
void Client::_check_request_errors()
{
std::cerr << "Content-Length=" << get_rq_headers("Content-Length") << "\n";
std::cerr << "strtoul=" << std::strtoul(get_rq_headers("Content-Length").c_str(), NULL, 10) << "\n";
std::cerr << "client_body_limit=" << assigned_server->client_body_limit << "\n";
///////////////////////
// Request line checks
if (_request.method == UNKNOWN)
@@ -439,13 +445,13 @@ void Client::_check_request_errors()
//////////////////
// Headers checks
else if (!this->get_rq_headers("Content-Length").empty()
&& std::strtoul(this->get_rq_headers("Content-Length").c_str(), NULL, 10) > assigned_server->client_body_limit)
else if (!get_rq_headers("Content-Length").empty()
&& std::strtoul(get_rq_headers("Content-Length").c_str(), NULL, 10) > assigned_server->client_body_limit)
status = 413;
else if (!this->get_rq_headers("Transfer-Encoding").empty()
&& this->get_rq_headers("Transfer-Encoding") != "chunked" )
else if (!get_rq_headers("Transfer-Encoding").empty()
&& get_rq_headers("Transfer-Encoding") != "chunked" )
status = 501;
else if (!this->get_rq_headers("Content-Encoding").empty())
else if (!get_rq_headers("Content-Encoding").empty())
{
status = 415;
response.append("Accept-Encoding:"); // empty, no encoding accepted

View File

@@ -18,6 +18,12 @@ struct Script
std::string info;
};
struct MultipartBody
{
std::map<std::string, std::string> headers;
std::string body;
};
struct Request
{
http_method method;
@@ -27,6 +33,7 @@ struct Request
std::string version;
std::map<std::string, std::string> headers;
std::string body;
std::vector<MultipartBody> multi_bodys;
std::string port;
std::string hostname;
struct Script script;
@@ -71,6 +78,9 @@ class Client
std::string get_rq_script_info() const;
std::string get_rq_headers(const std::string & key) const;
const std::vector<MultipartBody> &get_rq_multi_bodys() const;
const std::string get_rq_multi_bodys_headers(const std::string & key, std::vector<MultipartBody>::const_iterator body_it) const;
void parse_request_headers(std::vector<ServerConfig> &servers);
void parse_request_body();
void clear();
@@ -91,7 +101,8 @@ class Client
void _parse_request_fields();
void _parse_request_target( std::string target );
void _parse_port_hostname(std::string host);
void _parse_chunked_body(size_t pos);
void _parse_multipart_body(size_t pos);
void _check_request_errors();
};

View File

@@ -0,0 +1,103 @@
#include "Client.hpp"
const std::vector<MultipartBody> &Client::get_rq_multi_bodys() const { return _request.multi_bodys; }
const std::string Client::get_rq_multi_bodys_headers(const std::string & key, std::vector<MultipartBody>::const_iterator body_it) const
{
std::map<std::string, std::string>::const_iterator it;
it = body_it->headers.find(::str_tolower(key));
if (it == body_it->headers.end())
return ""; // IF return reference compiler "warning: returning reference to local temporary"
return it->second;
}
void Client::_parse_multipart_body(size_t pos)
{
/*
** Parsing roughly like described in :
** https://www.rfc-editor.org/rfc/rfc2046#section-5.1.1
*/
MultipartBody new_body;
std::string boundary;
size_t start_pos;
size_t end_pos;
std::string tmp;
size_t tmp_pos;
size_t ret;
// Get boundary
boundary = get_rq_headers("Content-Type");
start_pos = boundary.find("boundary=");
if (start_pos == NPOS)
{
status = 400; std::cerr << "_parse_multipart_body() error 1\n";
return;
}
start_pos += sizeof("boundary=")-1;
boundary = boundary.substr(start_pos);
std::cerr << "boundary =|" << boundary << "|\n";
// Search boundary
start_pos = raw_request.find("--" + boundary, pos);
if (start_pos == NPOS || start_pos + sizeof("--")-1 + boundary.size() > raw_request.size())
{
status = 400; std::cerr << "_parse_multipart_body() error 2\n";
return;
}
start_pos += sizeof("--")-1 + boundary.size() + CRLF_SIZE;
while (1) // TODO : test loop for multi body
{
end_pos = raw_request.find("--" + boundary, start_pos);
if (end_pos == NPOS)
{
status = 400; std::cerr << "_parse_multipart_body() error 3\n";
return;
}
/* // Maye useful for multi body (remove "start_pos - CRLF_SIZE" if used)
end_pos = raw_request.rfind(CRLF, end_pos); if (end_pos == NPOS) {status = 400; return; } */
new_body.body = raw_request.substr(start_pos, end_pos - start_pos - CRLF_SIZE);
// Split headers from body
tmp_pos = new_body.body.find(CRLF CRLF);
if (tmp_pos != NPOS)
{
ret = ::parse_http_headers(new_body.body.substr(0, tmp_pos), new_body.headers);
::str_map_key_tolower(new_body.headers);
if (ret)
{
status = 400; std::cerr << "_parse_multipart_body() error 4\n";
return;
}
tmp_pos += CRLF_SIZE*2;
new_body.body.erase(0, tmp_pos);
// ::print_map(new_body.headers);
}
else
{ // No headers case
tmp_pos = new_body.body.find(CRLF);
if (tmp_pos != 0)
{
status = 400; std::cerr << "_parse_multipart_body() error 5\n";
return;
}
}
_request.multi_bodys.push_back(new_body);
// Move start for next loop
start_pos = end_pos + sizeof("--")-1 + boundary.size();
if ( start_pos + 2 + CRLF_SIZE == raw_request.size()
&& raw_request[start_pos] == '-'
&& raw_request[start_pos+1] == '-')
break;
/* ::print_special(raw_request);
std::cerr << "start_pos = " << start_pos << "\n";
std::cerr << "raw_request.size() = " << raw_request.size() << "\n";
std::cerr << raw_request.substr(start_pos); */
}
}

View File

@@ -153,7 +153,7 @@ std::string http_methods_to_str(unsigned int methods)
file_type eval_file_type(const std::string &path)
{
const char *tmp_path = path.c_str();
const char *tmp_path = path.c_str(); // variable superflu ?
struct stat s;
if (stat(tmp_path, &s) != -1)
@@ -242,7 +242,7 @@ size_t
std::vector<std::string> list;
std::vector<std::string>::iterator it;
std::vector<std::string>::iterator it_end;
size_t err = 0;
size_t err_count = 0;
size_t pos;
std::string key;
std::string val;
@@ -255,13 +255,13 @@ size_t
pos = (*it).find(':');
if (pos == NPOS)
{
err++;
err_count++;
continue;
}
key = (*it).substr(0, pos);
if ( key.find(' ') != NPOS )
{
err++;
err_count++;
continue;
}
// bad idea, in cgi we need to have the original value
@@ -270,7 +270,7 @@ size_t
val = ::trim(val, ' ');
fields.insert( std::pair<std::string, std::string>(key, val) );
}
return err;
return err_count;
}
void str_map_key_tolower(std::map<std::string, std::string> & mp)

View File

@@ -6,6 +6,7 @@
# include <map>
# include <string>
# include <sstream>
# include <iostream>
# include <cstdlib> // strtol, strtoul
# include <climits> // LONG_MAX
# include <cerrno> // errno
@@ -73,4 +74,30 @@ void throw_test();
// debug
void print_special(std::string str);
/* Template */
template <typename T1, typename T2 >
void print_pair(const std::pair<T1,T2> p)
{
std::cout << p.first << ": ";
std::cout << p.second << "\n";
}
template <typename Key, typename T >
void print_map(const std::map<Key,T>& c)
{
typename std::map<Key,T>::const_iterator it = c.begin();
typename std::map<Key,T>::const_iterator it_end = c.end();
std::cout << " --print_map():\n";
std::cout << "map.size() = " << c.size() << "\n";
while (it != it_end)
{
print_pair(*it);
++it;
}
std::cout << " --\n";
}
#endif

View File

@@ -97,7 +97,7 @@ class Webserv
void _autoindex(Client *client, const std::string &path);
// method_post.cpp
void _post(Client *client, const std::string &path);
void _post_file(Client *client, const std::string &path);
void _upload_files(Client *client);
// method_delete.cpp
void _delete(Client *client, const std::string &path);
void _delete_file(Client *client, const std::string &path);

View File

@@ -112,9 +112,11 @@ void Webserv::_init_http_status_map()
_http_status.insert(status_pair(405, S405));
_http_status.insert(status_pair(408, S408));
_http_status.insert(status_pair(413, S413));
_http_status.insert(status_pair(415, S415));
_http_status.insert(status_pair(500, S500));
_http_status.insert(status_pair(501, S501));
_http_status.insert(status_pair(505, S505));
}
void Webserv::_init_mime_types_map()

View File

@@ -8,51 +8,119 @@ void Webserv::_post(Client *client, const std::string &path)
WIP
https://www.rfc-editor.org/rfc/rfc9110.html#name-post
*/
_post_file(client, path);
(void)path;
std::cout << "_post()\n";
std::cerr << "upload_dir = " << client->assigned_location->upload_dir << "\n";
if (client->get_rq_abs_path() != client->assigned_location->path)
client->status = 404; // 404 ? J'ai un doute.
else if (client->assigned_location->upload_dir.empty())
client->status = 404; // 404 ? J'ai un doute.
else if (client->get_rq_multi_bodys().empty())
{
client->status = 415;
client->response.append("Accept: multipart/form-data"); // empty, no encoding accepted
client->response.append(CRLF);
}
else
_upload_file(client);
}
void Webserv::_post_file(Client *client, const std::string &path)
#define DEFAULT_NAME "unnamed_file"
// TODO : Loop for multi body
void Webserv::_upload_files(Client *client)
{
std::ofstream ofd;
std::vector<MultipartBody>::const_iterator body_it = client->get_rq_multi_bodys().begin();
std::string path;
std::string filename;
size_t pos;
bool file_existed;
if (access(path.c_str(), F_OK) == -1)
file_existed = false;
else
file_existed = true;
// How to determine status 403 for file that dont already exist ?
if (file_existed && access(path.c_str(), W_OK) == -1)
while (body_it != client->get_rq_multi_bodys().end())
{
std::perror("err access()");
client->status = 403;
return ;
}
if (body_it->body.empty())
{
++body_it;
continue;
}
// Content-Disposition: form-data; name="upload_file"; filename="camion.jpg"
::print_map(body_it->headers);
filename = client->get_rq_multi_bodys_headers("Content-Disposition", body_it);
std::cerr << "filename ="<< filename << "\n";
pos = filename.find("filename=");
if (pos != NPOS)
{
filename = filename.substr(pos + sizeof("filename=")-1);
std::cerr << "filename ="<< filename << "\n";
// A l'arrache pour enlever les "
filename.erase(0, 1);
std::cerr << "filename ="<< filename << "\n";
filename.erase(filename.size()-1, 1);
std::cerr << "filename ="<< filename << "\n";
std::cerr << "filename ="<< filename << "\n";
if (filename.empty())
filename = DEFAULT_NAME;
}
else
{
filename = DEFAULT_NAME;
}
std::cerr << "filename ="<< filename << "\n";
path = client->assigned_location->upload_dir; // Assume there a final '/'
path.append(filename);
ofd.open(path.c_str(), std::ios::trunc);
if (!ofd)
{
std::cerr << path << ": ofd.open fail" << '\n';
client->status = 500;
}
else
{
// Content-Length useless at this point ?
ofd << client->get_rq_body();
if (access(path.c_str(), F_OK) == -1)
file_existed = false;
else
file_existed = true;
// How to determine status 403 for file that dont already exist ? access() on the upload_dir ?
if (file_existed && access(path.c_str(), W_OK) == -1)
{
std::perror("err access()");
client->status = 403;
return ;
}
ofd.open(path.c_str(), std::ios::trunc);
if (!ofd)
{
std::cerr << path << ": ofd.open fail" << '\n';
client->status = 500;
return;
}
ofd << body_it->body;
if (!ofd)
{
std::cerr << path << ": ofd.write fail" << '\n';
client->status = 500;
return;
}
else if (file_existed)
{
client->status = 200;
// WIP https://www.rfc-editor.org/rfc/rfc9110.html#name-200-ok
}
else
{
client->status = 201;
// WIP https://www.rfc-editor.org/rfc/rfc9110.html#section-9.3.3-4
}
++body_it;
ofd.close();
}
if (file_existed) // with multi body it doesn't make much sense
{
// client->status = 200;
client->status = 204; // DEBUG 204
client->response.append("Location: ");
client->response.append("/index.html"); // WIP
client->response.append(CRLF);
client->response.append(CRLF);
// WIP https://www.rfc-editor.org/rfc/rfc9110.html#name-200-ok
}
else
{
// client->status = 201;
client->status = 204; // DEBUG 204
client->response.append("Location: ");
client->response.append("/index.html"); // WIP
client->response.append(CRLF);
client->response.append(CRLF);
// WIP https://www.rfc-editor.org/rfc/rfc9110.html#section-9.3.3-4
}
}

View File

@@ -25,7 +25,7 @@ void Webserv::_request(Client *client)
}
else if (ret == READ_COMPLETE)
{
if (client->body_complete)
if (client->body_complete && client->get_rq_multi_bodys().empty()) // DEBUG
std::cerr << "______BODY\n" << client->get_rq_body() << "\n______\n"; // DEBUG
_epoll_update(client->get_cl_fd(), EPOLLOUT, EPOLL_CTL_MOD);
client->request_complete = true;

View File

@@ -21,7 +21,10 @@ void Webserv::_response(Client *client)
}
else if (ret == SEND_COMPLETE)
{
if (client->get_rq_headers("Connection") == "close" || client->status == 408)
if (client->get_rq_headers("Connection") == "close"
|| client->status == 400 // TODO: Refactoring
|| client->status == 408
|| client->status == 413)
_close_client(client->get_cl_fd());
else
{
@@ -61,7 +64,10 @@ void Webserv::_append_base_headers(Client *client)
{
client->response.append("Server: Webserv/0.1" CRLF);
if (client->get_rq_headers("Connection") == "close")
if (client->get_rq_headers("Connection") == "close"
|| client->status == 400 // TODO: Refactoring
|| client->status == 408
|| client->status == 413)
client->response.append("Connection: close" CRLF);
else
client->response.append("Connection: keep-alive" CRLF);

View File

@@ -12,7 +12,12 @@ void Webserv::_timeout()
std::cerr << "timeout request fd " << it->get_cl_fd() << "\n";
it->status = 408;
_epoll_update(it->get_cl_fd(), EPOLLOUT, EPOLL_CTL_MOD);
// DEBUG, close without repsonse 408
/* _close_client(it->get_cl_fd());
it = _clients.begin(); */
}
// else // DEBUG
++it;
}
}

6
test_file_upload.txt Normal file
View File

@@ -0,0 +1,6 @@
START
http://localhost:4040/test
http://localhost:4040/test/test_deeper/
http://localhost:4040/test/test_deeper/super_deep/
http://localhost:4040/test/index1.html
STOP

BIN
www/Cagneyc_intro.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.1 MiB

View File

@@ -5,6 +5,8 @@
https://www.rfc-editor.org/rfc/rfc2046#section-5.1.1
-->
<form action="/upload" method="post" enctype="multipart/form-data">
<input type="file" id="myFile" name="upload_file">
<input type="file" name="upload_file1">
<input type="file" name="upload_file2">
<input type="file" name="upload_file3">
<input type="submit">
</form>

View File

@@ -0,0 +1,10 @@
<!--
https://www.w3schools.com/howto/howto_html_file_upload_button.asp
https://www.filestack.com/fileschool/html/html-file-upload-tutorial-example/
https://www.rfc-editor.org/rfc/rfc9110#name-multipart-types
https://www.rfc-editor.org/rfc/rfc2046#section-5.1.1
-->
<form action="/upload" method="post" enctype="multipart/form-data">
<input type="file" name="upload_file1">
<input type="submit">
</form>