2020-05-05 09:47:40 +04:30
/*
* Copyright ( c ) 2018 - 2020 , Andreas Kling < kling @ serenityos . org >
*
2021-04-22 01:24:48 -07:00
* SPDX - License - Identifier : BSD - 2 - Clause
2020-05-05 09:47:40 +04:30
*/
2021-01-16 15:51:56 +01:00
# include <AK/Debug.h>
2021-03-03 23:54:07 +02:00
# include <LibCompress/Gzip.h>
2021-03-22 02:41:13 +00:00
# include <LibCompress/Zlib.h>
2021-05-12 04:26:25 +04:30
# include <LibCore/Event.h>
2020-05-05 09:47:40 +04:30
# include <LibCore/TCPSocket.h>
# include <LibHTTP/HttpResponse.h>
# include <LibHTTP/Job.h>
# include <stdio.h>
# include <unistd.h>
namespace HTTP {
static ByteBuffer handle_content_encoding ( const ByteBuffer & buf , const String & content_encoding )
{
2021-02-07 15:33:24 +03:30
dbgln_if ( JOB_DEBUG , " Job::handle_content_encoding: buf has content_encoding={} " , content_encoding ) ;
2020-05-05 09:47:40 +04:30
if ( content_encoding = = " gzip " ) {
2021-03-03 23:54:07 +02:00
if ( ! Compress : : GzipDecompressor : : is_likely_compressed ( buf ) ) {
2021-01-09 18:51:44 +01:00
dbgln ( " Job::handle_content_encoding: buf is not gzip compressed! " ) ;
2020-05-05 09:47:40 +04:30
}
2021-02-07 15:33:24 +03:30
dbgln_if ( JOB_DEBUG , " Job::handle_content_encoding: buf is gzip compressed! " ) ;
2020-05-05 09:47:40 +04:30
2021-03-03 23:54:07 +02:00
auto uncompressed = Compress : : GzipDecompressor : : decompress_all ( buf ) ;
2020-05-05 09:47:40 +04:30
if ( ! uncompressed . has_value ( ) ) {
2021-01-09 18:51:44 +01:00
dbgln ( " Job::handle_content_encoding: Gzip::decompress() failed. Returning original buffer. " ) ;
2020-05-05 09:47:40 +04:30
return buf ;
}
2021-01-23 23:59:27 +01:00
if constexpr ( JOB_DEBUG ) {
2021-01-16 15:51:56 +01:00
dbgln ( " Job::handle_content_encoding: Gzip::decompress() successful. " ) ;
dbgln ( " Input size: {} " , buf . size ( ) ) ;
dbgln ( " Output size: {} " , uncompressed . value ( ) . size ( ) ) ;
}
2020-05-05 09:47:40 +04:30
2021-03-22 02:41:13 +00:00
return uncompressed . value ( ) ;
} else if ( content_encoding = = " deflate " ) {
dbgln_if ( JOB_DEBUG , " Job::handle_content_encoding: buf is deflate compressed! " ) ;
// Even though the content encoding is "deflate", it's actually deflate with the zlib wrapper.
// https://tools.ietf.org/html/rfc7230#section-4.2.2
auto uncompressed = Compress : : Zlib : : decompress_all ( buf ) ;
if ( ! uncompressed . has_value ( ) ) {
// From the RFC:
// "Note: Some non-conformant implementations send the "deflate"
// compressed data without the zlib wrapper."
dbgln_if ( JOB_DEBUG , " Job::handle_content_encoding: Zlib::decompress_all() failed. Trying DeflateDecompressor::decompress_all() " ) ;
uncompressed = Compress : : DeflateDecompressor : : decompress_all ( buf ) ;
if ( ! uncompressed . has_value ( ) ) {
dbgln ( " Job::handle_content_encoding: DeflateDecompressor::decompress_all() failed, returning original buffer. " ) ;
return buf ;
}
}
if constexpr ( JOB_DEBUG ) {
dbgln ( " Job::handle_content_encoding: Deflate decompression successful. " ) ;
dbgln ( " Input size: {} " , buf . size ( ) ) ;
dbgln ( " Output size: {} " , uncompressed . value ( ) . size ( ) ) ;
}
2020-05-05 09:47:40 +04:30
return uncompressed . value ( ) ;
}
return buf ;
}
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 17:14:12 +03:30
Job : : Job ( const HttpRequest & request , OutputStream & output_stream )
: Core : : NetworkJob ( output_stream )
, m_request ( request )
2020-05-05 09:47:40 +04:30
{
}
Job : : ~ Job ( )
{
}
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 17:14:12 +03:30
void Job : : flush_received_buffers ( )
{
if ( ! m_can_stream_response | | m_buffered_size = = 0 )
return ;
2021-09-18 03:48:22 +04:30
dbgln_if ( JOB_DEBUG , " Job: Flushing received buffers: have {} bytes in {} buffers for {} " , m_buffered_size , m_received_buffers . size ( ) , m_request . url ( ) ) ;
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 17:14:12 +03:30
for ( size_t i = 0 ; i < m_received_buffers . size ( ) ; + + i ) {
auto & payload = m_received_buffers [ i ] ;
auto written = do_write ( payload ) ;
m_buffered_size - = written ;
if ( written = = payload . size ( ) ) {
// FIXME: Make this a take-first-friendly object?
m_received_buffers . take_first ( ) ;
- - i ;
continue ;
}
2021-02-23 20:42:32 +01:00
VERIFY ( written < payload . size ( ) ) ;
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 17:14:12 +03:30
payload = payload . slice ( written , payload . size ( ) - written ) ;
2021-01-16 15:51:56 +01:00
break ;
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 17:14:12 +03:30
}
2021-09-18 03:48:22 +04:30
dbgln_if ( JOB_DEBUG , " Job: Flushing received buffers done: have {} bytes in {} buffers for {} " , m_buffered_size , m_received_buffers . size ( ) , m_request . url ( ) ) ;
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 17:14:12 +03:30
}
2020-05-05 09:47:40 +04:30
void Job : : on_socket_connected ( )
{
register_on_ready_to_write ( [ & ] {
if ( m_sent_data )
return ;
m_sent_data = true ;
auto raw_request = m_request . to_raw_request ( ) ;
2021-01-16 15:51:56 +01:00
2021-01-23 23:59:27 +01:00
if constexpr ( JOB_DEBUG ) {
2021-01-16 15:51:56 +01:00
dbgln ( " Job: raw_request: " ) ;
dbgln ( " {} " , String : : copy ( raw_request ) ) ;
}
2020-05-05 09:47:40 +04:30
bool success = write ( raw_request ) ;
if ( ! success )
2021-08-30 18:12:48 +00:00
deferred_invoke ( [ this ] { did_fail ( Core : : NetworkJob : : Error : : TransmissionFailed ) ; } ) ;
2020-05-05 09:47:40 +04:30
} ) ;
register_on_ready_to_read ( [ & ] {
2021-09-18 03:48:22 +04:30
dbgln_if ( JOB_DEBUG , " Ready to read for {}, state = {}, cancelled = {} " , m_request . url ( ) , to_underlying ( m_state ) , is_cancelled ( ) ) ;
2020-05-05 09:47:40 +04:30
if ( is_cancelled ( ) )
return ;
2021-01-06 22:09:29 +03:30
if ( m_state = = State : : Finished ) {
2021-05-20 02:05:53 +04:30
// We have everything we want, at this point, we can either get an EOF, or a bunch of extra newlines
// (unless "Connection: close" isn't specified)
// So just ignore everything after this.
2021-01-06 22:09:29 +03:30
return ;
}
2020-05-05 09:47:40 +04:30
if ( m_state = = State : : InStatus ) {
2021-09-18 03:48:22 +04:30
if ( ! can_read_line ( ) ) {
dbgln_if ( JOB_DEBUG , " Job {} cannot read line " , m_request . url ( ) ) ;
2020-05-05 09:47:40 +04:30
return ;
2021-09-18 03:48:22 +04:30
}
2020-05-05 09:47:40 +04:30
auto line = read_line ( PAGE_SIZE ) ;
2021-09-18 03:48:22 +04:30
dbgln_if ( JOB_DEBUG , " Job {} read line of length {} " , m_request . url ( ) , line . length ( ) ) ;
2020-05-05 09:47:40 +04:30
if ( line . is_null ( ) ) {
2021-09-06 03:29:52 +04:30
dbgln ( " Job: Expected HTTP status " ) ;
2021-08-30 18:12:48 +00:00
return deferred_invoke ( [ this ] { did_fail ( Core : : NetworkJob : : Error : : TransmissionFailed ) ; } ) ;
2020-05-05 09:47:40 +04:30
}
2020-12-13 11:44:53 +01:00
auto parts = line . split_view ( ' ' ) ;
2020-05-05 09:47:40 +04:30
if ( parts . size ( ) < 3 ) {
2021-09-06 03:29:52 +04:30
dbgln ( " Job: Expected 3-part HTTP status, got '{}' " , line ) ;
2021-08-30 18:12:48 +00:00
return deferred_invoke ( [ this ] { did_fail ( Core : : NetworkJob : : Error : : ProtocolFailed ) ; } ) ;
2020-05-05 09:47:40 +04:30
}
2020-06-12 21:07:52 +02:00
auto code = parts [ 1 ] . to_uint ( ) ;
if ( ! code . has_value ( ) ) {
2021-09-06 03:29:52 +04:30
dbgln ( " Job: Expected numeric HTTP status " ) ;
2021-08-30 18:12:48 +00:00
return deferred_invoke ( [ this ] { did_fail ( Core : : NetworkJob : : Error : : ProtocolFailed ) ; } ) ;
2020-05-05 09:47:40 +04:30
}
2020-06-12 21:07:52 +02:00
m_code = code . value ( ) ;
2020-05-05 09:47:40 +04:30
m_state = State : : InHeaders ;
return ;
}
2020-08-18 20:34:15 -06:00
if ( m_state = = State : : InHeaders | | m_state = = State : : Trailers ) {
2020-05-05 09:47:40 +04:30
if ( ! can_read_line ( ) )
return ;
auto line = read_line ( PAGE_SIZE ) ;
if ( line . is_null ( ) ) {
2020-08-18 20:34:15 -06:00
if ( m_state = = State : : Trailers ) {
2020-07-26 06:38:33 +04:30
// Some servers like to send two ending chunks
// use this fact as an excuse to ignore anything after the last chunk
// that is not a valid trailing header.
return finish_up ( ) ;
}
2021-09-06 03:29:52 +04:30
dbgln ( " Job: Expected HTTP header " ) ;
2020-05-05 09:47:40 +04:30
return did_fail ( Core : : NetworkJob : : Error : : ProtocolFailed ) ;
}
2020-12-13 11:44:53 +01:00
if ( line . is_empty ( ) ) {
2020-08-18 20:34:15 -06:00
if ( m_state = = State : : Trailers ) {
2020-05-12 02:55:10 +04:30
return finish_up ( ) ;
} else {
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 17:14:12 +03:30
if ( on_headers_received )
on_headers_received ( m_headers , m_code > 0 ? m_code : Optional < u32 > { } ) ;
2020-05-12 02:55:10 +04:30
m_state = State : : InBody ;
}
2021-09-19 02:31:40 +04:30
// We've reached the end of the headers, there's a possibility that the server
// responds with nothing (content-length = 0 with normal encoding); if that's the case,
// quit early as we won't be reading anything anyway.
if ( auto result = m_headers . get ( " Content-Length " sv ) . value_or ( " " sv ) . to_uint ( ) ; result . has_value ( ) ) {
if ( result . value ( ) = = 0 & & ! m_headers . get ( " Transfer-Encoding " sv ) . value_or ( " " sv ) . view ( ) . trim_whitespace ( ) . equals_ignoring_case ( " chunked " sv ) )
return finish_up ( ) ;
}
2020-05-05 09:47:40 +04:30
return ;
}
2020-12-13 11:44:53 +01:00
auto parts = line . split_view ( ' : ' ) ;
2020-05-05 09:47:40 +04:30
if ( parts . is_empty ( ) ) {
2020-08-18 20:34:15 -06:00
if ( m_state = = State : : Trailers ) {
2020-07-26 06:38:33 +04:30
// Some servers like to send two ending chunks
// use this fact as an excuse to ignore anything after the last chunk
// that is not a valid trailing header.
return finish_up ( ) ;
}
2021-09-06 03:29:52 +04:30
dbgln ( " Job: Expected HTTP header with key/value " ) ;
2021-08-30 18:12:48 +00:00
return deferred_invoke ( [ this ] { did_fail ( Core : : NetworkJob : : Error : : ProtocolFailed ) ; } ) ;
2020-05-05 09:47:40 +04:30
}
auto name = parts [ 0 ] ;
2020-12-13 11:44:53 +01:00
if ( line . length ( ) < name . length ( ) + 2 ) {
2020-08-18 20:34:15 -06:00
if ( m_state = = State : : Trailers ) {
2020-07-26 06:38:33 +04:30
// Some servers like to send two ending chunks
// use this fact as an excuse to ignore anything after the last chunk
// that is not a valid trailing header.
return finish_up ( ) ;
}
2021-09-06 03:29:52 +04:30
dbgln ( " Job: Malformed HTTP header: '{}' ({}) " , line , line . length ( ) ) ;
2021-08-30 18:12:48 +00:00
return deferred_invoke ( [ this ] { did_fail ( Core : : NetworkJob : : Error : : ProtocolFailed ) ; } ) ;
2020-05-05 09:47:40 +04:30
}
2020-12-13 11:44:53 +01:00
auto value = line . substring ( name . length ( ) + 2 , line . length ( ) - name . length ( ) - 2 ) ;
2020-05-05 09:47:40 +04:30
m_headers . set ( name , value ) ;
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 17:14:12 +03:30
if ( name . equals_ignoring_case ( " Content-Encoding " ) ) {
// Assume that any content-encoding means that we can't decode it as a stream :(
2021-02-07 15:33:24 +03:30
dbgln_if ( JOB_DEBUG , " Content-Encoding {} detected, cannot stream output :( " , value ) ;
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 17:14:12 +03:30
m_can_stream_response = false ;
}
2021-02-07 15:33:24 +03:30
dbgln_if ( JOB_DEBUG , " Job: [{}] = '{}' " , name , value ) ;
2020-05-05 09:47:40 +04:30
return ;
}
2021-02-23 20:42:32 +01:00
VERIFY ( m_state = = State : : InBody ) ;
VERIFY ( can_read ( ) ) ;
2020-05-05 09:47:40 +04:30
read_while_data_available ( [ & ] {
AK: Rename KB, MB, GB to KiB, MiB, GiB
The SI prefixes "k", "M", "G" mean "10^3", "10^6", "10^9".
The IEC prefixes "Ki", "Mi", "Gi" mean "2^10", "2^20", "2^30".
Let's use the correct name, at least in code.
Only changes the name of the constants, no other behavior change.
2020-08-15 13:55:00 -04:00
auto read_size = 64 * KiB ;
2020-05-12 02:55:10 +04:30
if ( m_current_chunk_remaining_size . has_value ( ) ) {
read_chunk_size : ;
auto remaining = m_current_chunk_remaining_size . value ( ) ;
if ( remaining = = - 1 ) {
// read size
auto size_data = read_line ( PAGE_SIZE ) ;
2021-04-12 00:47:33 +04:30
if ( m_should_read_chunk_ending_line ) {
VERIFY ( size_data . is_empty ( ) ) ;
m_should_read_chunk_ending_line = false ;
return IterationDecision : : Continue ;
}
2020-12-13 11:44:53 +01:00
auto size_lines = size_data . view ( ) . lines ( ) ;
2021-02-07 15:33:24 +03:30
dbgln_if ( JOB_DEBUG , " Job: Received a chunk with size '{}' " , size_data ) ;
2020-05-12 02:55:10 +04:30
if ( size_lines . size ( ) = = 0 ) {
2021-01-09 18:51:44 +01:00
dbgln ( " Job: Reached end of stream " ) ;
2020-08-18 20:34:15 -06:00
finish_up ( ) ;
2020-05-12 02:55:10 +04:30
return IterationDecision : : Break ;
} else {
2020-05-21 00:36:54 +04:30
auto chunk = size_lines [ 0 ] . split_view ( ' ; ' , true ) ;
String size_string = chunk [ 0 ] ;
char * endptr ;
auto size = strtoul ( size_string . characters ( ) , & endptr , 16 ) ;
if ( * endptr ) {
// invalid number
2021-08-30 18:12:48 +00:00
deferred_invoke ( [ this ] { did_fail ( Core : : NetworkJob : : Error : : TransmissionFailed ) ; } ) ;
2020-05-21 00:36:54 +04:30
return IterationDecision : : Break ;
}
if ( size = = 0 ) {
2020-05-12 02:55:10 +04:30
// This is the last chunk
// '0' *[; chunk-ext-name = chunk-ext-value]
// We're going to ignore _all_ chunk extensions
read_size = 0 ;
m_current_chunk_total_size = 0 ;
m_current_chunk_remaining_size = 0 ;
2021-01-16 15:51:56 +01:00
2021-02-07 15:33:24 +03:30
dbgln_if ( JOB_DEBUG , " Job: Received the last chunk with extensions '{}' " , size_string . substring_view ( 1 , size_string . length ( ) - 1 ) ) ;
2020-05-12 02:55:10 +04:30
} else {
m_current_chunk_total_size = size ;
m_current_chunk_remaining_size = size ;
read_size = size ;
2021-01-16 15:51:56 +01:00
2021-02-07 15:33:24 +03:30
dbgln_if ( JOB_DEBUG , " Job: Chunk of size '{}' started " , size ) ;
2020-05-12 02:55:10 +04:30
}
}
} else {
read_size = remaining ;
2021-01-16 15:51:56 +01:00
2021-02-07 15:33:24 +03:30
dbgln_if ( JOB_DEBUG , " Job: Resuming chunk with '{}' bytes left over " , remaining ) ;
2020-05-12 02:55:10 +04:30
}
} else {
auto transfer_encoding = m_headers . get ( " Transfer-Encoding " ) ;
if ( transfer_encoding . has_value ( ) ) {
2021-04-14 09:20:25 +04:30
// Note: Some servers add extra spaces around 'chunked', see #6302.
auto encoding = transfer_encoding . value ( ) . trim_whitespace ( ) ;
2021-01-16 15:51:56 +01:00
2021-02-07 15:33:24 +03:30
dbgln_if ( JOB_DEBUG , " Job: This content has transfer encoding '{}' " , encoding ) ;
2020-05-12 02:55:10 +04:30
if ( encoding . equals_ignoring_case ( " chunked " ) ) {
m_current_chunk_remaining_size = - 1 ;
goto read_chunk_size ;
} else {
2021-01-16 15:51:56 +01:00
dbgln ( " Job: Unknown transfer encoding '{}', the result will likely be wrong! " , encoding ) ;
2020-05-12 02:55:10 +04:30
}
}
}
2021-09-18 03:48:22 +04:30
dbgln_if ( JOB_DEBUG , " Waiting for payload for {} " , m_request . url ( ) ) ;
2020-05-12 02:55:10 +04:30
auto payload = receive ( read_size ) ;
2021-09-18 03:48:22 +04:30
dbgln_if ( JOB_DEBUG , " Received {} bytes of payload from {} " , payload . size ( ) , m_request . url ( ) ) ;
2021-05-16 08:47:46 +02:00
if ( payload . is_empty ( ) ) {
2020-05-05 09:47:40 +04:30
if ( eof ( ) ) {
finish_up ( ) ;
return IterationDecision : : Break ;
}
if ( should_fail_on_empty_payload ( ) ) {
2021-08-30 18:12:48 +00:00
deferred_invoke ( [ this ] { did_fail ( Core : : NetworkJob : : Error : : ProtocolFailed ) ; } ) ;
2020-05-05 09:47:40 +04:30
return IterationDecision : : Break ;
}
}
2020-05-12 02:55:10 +04:30
2020-05-05 09:47:40 +04:30
m_received_buffers . append ( payload ) ;
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 17:14:12 +03:30
m_buffered_size + = payload . size ( ) ;
2020-05-05 09:47:40 +04:30
m_received_size + = payload . size ( ) ;
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 17:14:12 +03:30
flush_received_buffers ( ) ;
2020-05-05 09:47:40 +04:30
2020-05-12 02:55:10 +04:30
if ( m_current_chunk_remaining_size . has_value ( ) ) {
auto size = m_current_chunk_remaining_size . value ( ) - payload . size ( ) ;
2021-01-16 15:51:56 +01:00
2021-02-07 15:33:24 +03:30
dbgln_if ( JOB_DEBUG , " Job: We have {} bytes left over in this chunk " , size ) ;
2020-05-12 02:55:10 +04:30
if ( size = = 0 ) {
2021-02-07 15:33:24 +03:30
dbgln_if ( JOB_DEBUG , " Job: Finished a chunk of {} bytes " , m_current_chunk_total_size . value ( ) ) ;
2020-08-18 20:34:15 -06:00
if ( m_current_chunk_total_size . value ( ) = = 0 ) {
m_state = State : : Trailers ;
return IterationDecision : : Break ;
}
2020-05-12 02:55:10 +04:30
// we've read everything, now let's get the next chunk
size = - 1 ;
2021-04-12 00:47:33 +04:30
if ( can_read_line ( ) ) {
auto line = read_line ( PAGE_SIZE ) ;
VERIFY ( line . is_empty ( ) ) ;
} else {
m_should_read_chunk_ending_line = true ;
}
2020-05-12 02:55:10 +04:30
}
m_current_chunk_remaining_size = size ;
}
2020-05-05 09:47:40 +04:30
auto content_length_header = m_headers . get ( " Content-Length " ) ;
Optional < u32 > content_length { } ;
if ( content_length_header . has_value ( ) ) {
2020-06-12 21:07:52 +02:00
auto length = content_length_header . value ( ) . to_uint ( ) ;
if ( length . has_value ( ) )
content_length = length . value ( ) ;
2020-05-05 09:47:40 +04:30
}
2021-08-30 18:12:48 +00:00
deferred_invoke ( [ this , content_length ] { did_progress ( content_length , m_received_size ) ; } ) ;
2020-05-05 09:47:40 +04:30
if ( content_length . has_value ( ) ) {
auto length = content_length . value ( ) ;
if ( m_received_size > = length ) {
m_received_size = length ;
finish_up ( ) ;
return IterationDecision : : Break ;
}
}
return IterationDecision : : Continue ;
} ) ;
if ( ! is_established ( ) ) {
2021-05-01 21:10:08 +02:00
dbgln_if ( JOB_DEBUG , " Connection appears to have closed, finishing up " ) ;
2020-05-05 09:47:40 +04:30
finish_up ( ) ;
}
} ) ;
}
2021-05-12 04:26:25 +04:30
void Job : : timer_event ( Core : : TimerEvent & event )
{
event . accept ( ) ;
finish_up ( ) ;
if ( m_buffered_size = = 0 )
stop_timer ( ) ;
}
2020-05-05 09:47:40 +04:30
void Job : : finish_up ( )
{
2021-06-29 01:40:18 +04:30
VERIFY ( ! m_has_scheduled_finish ) ;
2020-05-05 09:47:40 +04:30
m_state = State : : Finished ;
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 17:14:12 +03:30
if ( ! m_can_stream_response ) {
2021-09-06 03:29:52 +04:30
auto flattened_buffer = ByteBuffer : : create_uninitialized ( m_received_size ) . release_value ( ) ; // FIXME: Handle possible OOM situation.
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 17:14:12 +03:30
u8 * flat_ptr = flattened_buffer . data ( ) ;
for ( auto & received_buffer : m_received_buffers ) {
memcpy ( flat_ptr , received_buffer . data ( ) , received_buffer . size ( ) ) ;
flat_ptr + = received_buffer . size ( ) ;
}
m_received_buffers . clear ( ) ;
// For the time being, we cannot stream stuff with content-encoding set to _anything_.
2021-03-03 23:54:07 +02:00
// FIXME: LibCompress exposes a streaming interface, so this can be resolved
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 17:14:12 +03:30
auto content_encoding = m_headers . get ( " Content-Encoding " ) ;
if ( content_encoding . has_value ( ) ) {
flattened_buffer = handle_content_encoding ( flattened_buffer , content_encoding . value ( ) ) ;
}
m_buffered_size = flattened_buffer . size ( ) ;
m_received_buffers . append ( move ( flattened_buffer ) ) ;
m_can_stream_response = true ;
2020-05-05 09:47:40 +04:30
}
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 17:14:12 +03:30
flush_received_buffers ( ) ;
if ( m_buffered_size ! = 0 ) {
2020-12-31 01:42:44 +03:30
// We have to wait for the client to consume all the downloaded data
// before we can actually call `did_finish`. in a normal flow, this should
// never be hit since the client is reading as we are writing, unless there
// are too many concurrent downloads going on.
2021-05-12 04:26:25 +04:30
dbgln_if ( JOB_DEBUG , " Flush finished with {} bytes remaining, will try again later " , m_buffered_size ) ;
if ( ! has_timer ( ) )
start_timer ( 50 ) ;
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 17:14:12 +03:30
return ;
2020-05-05 09:47:40 +04:30
}
2021-06-29 01:40:18 +04:30
m_has_scheduled_finish = true ;
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 17:14:12 +03:30
auto response = HttpResponse : : create ( m_code , move ( m_headers ) ) ;
2021-08-30 18:12:48 +00:00
deferred_invoke ( [ this , response = move ( response ) ] {
2021-06-29 01:40:18 +04:30
did_finish ( response ) ;
2020-05-05 09:47:40 +04:30
} ) ;
}
}