2020-01-18 09:38:21 +01:00
/*
* Copyright ( c ) 2018 - 2020 , Andreas Kling < kling @ serenityos . org >
* All rights reserved .
*
* Redistribution and use in source and binary forms , with or without
* modification , are permitted provided that the following conditions are met :
*
* 1. Redistributions of source code must retain the above copyright notice , this
* list of conditions and the following disclaimer .
*
* 2. Redistributions in binary form must reproduce the above copyright notice ,
* this list of conditions and the following disclaimer in the documentation
* and / or other materials provided with the distribution .
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS " AS IS "
* AND ANY EXPRESS OR IMPLIED WARRANTIES , INCLUDING , BUT NOT LIMITED TO , THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED . IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT , INDIRECT , INCIDENTAL , SPECIAL , EXEMPLARY , OR CONSEQUENTIAL
* DAMAGES ( INCLUDING , BUT NOT LIMITED TO , PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES ; LOSS OF USE , DATA , OR PROFITS ; OR BUSINESS INTERRUPTION ) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY , WHETHER IN CONTRACT , STRICT LIABILITY ,
* OR TORT ( INCLUDING NEGLIGENCE OR OTHERWISE ) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE , EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE .
*/
2020-04-26 22:52:30 +02:00
# include <AK/Base64.h>
2020-05-26 15:44:57 +04:30
# include <AK/JsonObject.h>
2020-01-01 18:53:34 +01:00
# include <AK/SharedBuffer.h>
2020-04-03 22:58:05 +02:00
# include <LibCore/EventLoop.h>
2020-02-06 15:04:03 +01:00
# include <LibCore/File.h>
2019-11-24 14:24:09 +01:00
# include <LibProtocol/Client.h>
# include <LibProtocol/Download.h>
2020-06-01 21:58:29 +02:00
# include <LibWeb/Loader/LoadRequest.h>
2020-06-01 21:33:23 +02:00
# include <LibWeb/Loader/Resource.h>
2020-06-01 20:42:50 +02:00
# include <LibWeb/Loader/ResourceLoader.h>
2019-10-08 19:37:15 +02:00
2020-06-13 15:27:53 +02:00
//#define CACHE_DEBUG
2020-03-07 10:27:02 +01:00
namespace Web {
2019-10-08 19:37:15 +02:00
ResourceLoader & ResourceLoader : : the ( )
{
static ResourceLoader * s_the ;
if ( ! s_the )
2019-10-08 19:40:48 +02:00
s_the = & ResourceLoader : : construct ( ) . leak_ref ( ) ;
2019-10-08 19:37:15 +02:00
return * s_the ;
}
2019-11-24 14:24:09 +01:00
ResourceLoader : : ResourceLoader ( )
2020-02-05 18:21:30 +01:00
: m_protocol_client ( Protocol : : Client : : construct ( ) )
2020-05-21 12:58:57 +02:00
, m_user_agent ( " Mozilla/4.0 (SerenityOS; x86) LibWeb+LibJS (Not KHTML, nor Gecko) LibWeb " )
2019-11-24 14:24:09 +01:00
{
}
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 17:14:12 +03:30
void ResourceLoader : : load_sync ( const URL & url , Function < void ( ReadonlyBytes , const HashMap < String , String , CaseInsensitiveStringTraits > & response_headers ) > success_callback , Function < void ( const String & ) > error_callback )
2020-04-03 22:58:05 +02:00
{
Core : : EventLoop loop ;
load (
url ,
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 17:14:12 +03:30
[ & ] ( auto data , auto & response_headers ) {
2020-05-03 22:20:49 +02:00
success_callback ( data , response_headers ) ;
2020-04-03 22:58:05 +02:00
loop . quit ( 0 ) ;
} ,
[ & ] ( auto & string ) {
2020-04-04 14:17:39 +02:00
if ( error_callback )
error_callback ( string ) ;
2020-04-03 22:58:05 +02:00
loop . quit ( 0 ) ;
} ) ;
loop . exec ( ) ;
}
2020-06-01 21:58:29 +02:00
static HashMap < LoadRequest , NonnullRefPtr < Resource > > s_resource_cache ;
2020-06-02 20:27:26 +02:00
RefPtr < Resource > ResourceLoader : : load_resource ( Resource : : Type type , const LoadRequest & request )
2020-06-01 21:33:23 +02:00
{
2020-06-01 21:58:29 +02:00
if ( ! request . is_valid ( ) )
2020-06-01 21:33:23 +02:00
return nullptr ;
2020-06-01 21:58:29 +02:00
auto it = s_resource_cache . find ( request ) ;
if ( it ! = s_resource_cache . end ( ) ) {
2020-06-05 23:35:08 +02:00
if ( it - > value - > type ( ) ! = type ) {
dbg ( ) < < " FIXME: Not using cached resource for " < < request . url ( ) < < " since there's a type mismatch. " ;
} else {
2020-06-13 15:27:53 +02:00
# ifdef CACHE_DEBUG
2020-06-05 23:35:08 +02:00
dbg ( ) < < " Reusing cached resource for: " < < request . url ( ) ;
2020-06-13 15:27:53 +02:00
# endif
2020-06-05 23:35:08 +02:00
return it - > value ;
}
2020-06-01 21:58:29 +02:00
}
2020-06-02 20:27:26 +02:00
auto resource = Resource : : create ( { } , type , request ) ;
2020-06-01 21:58:29 +02:00
s_resource_cache . set ( request , resource ) ;
2020-06-01 21:33:23 +02:00
load (
2020-09-28 11:55:26 +02:00
request ,
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 17:14:12 +03:30
[ = ] ( auto data , auto & headers ) {
2020-06-01 21:33:23 +02:00
const_cast < Resource & > ( * resource ) . did_load ( { } , data , headers ) ;
} ,
[ = ] ( auto & error ) {
const_cast < Resource & > ( * resource ) . did_fail ( { } , error ) ;
} ) ;
return resource ;
}
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 17:14:12 +03:30
void ResourceLoader : : load ( const LoadRequest & request , Function < void ( ReadonlyBytes , const HashMap < String , String , CaseInsensitiveStringTraits > & response_headers ) > success_callback , Function < void ( const String & ) > error_callback )
2019-10-08 19:37:15 +02:00
{
2020-09-28 11:55:26 +02:00
auto & url = request . url ( ) ;
2020-04-12 04:01:34 +00:00
if ( is_port_blocked ( url . port ( ) ) ) {
2020-04-26 22:08:03 +02:00
dbg ( ) < < " ResourceLoader::load: Error: blocked port " < < url . port ( ) < < " for URL: " < < url ;
return ;
2020-04-12 04:01:34 +00:00
}
2020-05-10 11:13:36 +02:00
if ( url . protocol ( ) = = " about " ) {
dbg ( ) < < " Loading about: URL " < < url ;
deferred_invoke ( [ success_callback = move ( success_callback ) ] ( auto & ) {
2020-12-19 17:39:59 +01:00
success_callback ( String : : empty ( ) . to_byte_buffer ( ) , { } ) ;
2020-05-10 11:13:36 +02:00
} ) ;
return ;
}
2020-04-26 22:52:30 +02:00
if ( url . protocol ( ) = = " data " ) {
dbg ( ) < < " ResourceLoader loading a data URL with mime-type: ' " < < url . data_mime_type ( ) < < " ', base64= " < < url . data_payload_is_base64 ( ) < < " , payload=' " < < url . data_payload ( ) < < " ' " ;
ByteBuffer data ;
if ( url . data_payload_is_base64 ( ) )
data = decode_base64 ( url . data_payload ( ) ) ;
else
data = url . data_payload ( ) . to_byte_buffer ( ) ;
deferred_invoke ( [ data = move ( data ) , success_callback = move ( success_callback ) ] ( auto & ) {
2020-05-03 22:20:49 +02:00
success_callback ( data , { } ) ;
2020-04-26 22:52:30 +02:00
} ) ;
return ;
}
2019-10-08 19:37:15 +02:00
if ( url . protocol ( ) = = " file " ) {
2020-02-02 12:34:39 +01:00
auto f = Core : : File : : construct ( ) ;
2019-10-08 19:37:15 +02:00
f - > set_filename ( url . path ( ) ) ;
2020-02-02 12:34:39 +01:00
if ( ! f - > open ( Core : : IODevice : : OpenMode : : ReadOnly ) ) {
2019-10-08 19:40:48 +02:00
dbg ( ) < < " ResourceLoader::load: Error: " < < f - > error_string ( ) ;
2020-03-31 23:59:11 +01:00
if ( error_callback )
error_callback ( f - > error_string ( ) ) ;
2019-10-08 19:37:15 +02:00
return ;
}
auto data = f - > read_all ( ) ;
2020-03-31 23:59:11 +01:00
deferred_invoke ( [ data = move ( data ) , success_callback = move ( success_callback ) ] ( auto & ) {
2020-05-03 22:20:49 +02:00
success_callback ( data , { } ) ;
2019-10-08 19:40:48 +02:00
} ) ;
2019-10-08 19:37:15 +02:00
return ;
}
2020-05-14 18:36:47 +10:00
if ( url . protocol ( ) = = " http " | | url . protocol ( ) = = " https " | | url . protocol ( ) = = " gemini " ) {
2020-05-21 12:58:57 +02:00
HashMap < String , String > headers ;
headers . set ( " User-Agent " , m_user_agent ) ;
2020-11-11 07:46:25 +00:00
headers . set ( " Accept-Encoding " , " gzip " ) ;
2020-09-28 11:55:26 +02:00
for ( auto & it : request . headers ( ) ) {
headers . set ( it . key , it . value ) ;
}
auto download = protocol_client ( ) . start_download ( request . method ( ) , url . to_string ( ) , headers , request . body ( ) ) ;
2020-04-04 20:00:07 +02:00
if ( ! download ) {
if ( error_callback )
error_callback ( " Failed to initiate load " ) ;
return ;
}
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 17:14:12 +03:30
download - > on_buffered_download_finish = [ this , success_callback = move ( success_callback ) , error_callback = move ( error_callback ) , download ] ( bool success , auto , auto & response_headers , auto status_code , ReadonlyBytes payload ) {
if ( status_code . has_value ( ) & & status_code . value ( ) > = 400 & & status_code . value ( ) < = 499 ) {
if ( error_callback )
2021-01-03 14:05:46 +01:00
error_callback ( String : : formatted ( " HTTP error ({}) " , status_code . value ( ) ) ) ;
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 17:14:12 +03:30
return ;
}
2019-11-25 11:47:25 +01:00
- - m_pending_loads ;
if ( on_load_counter_change )
on_load_counter_change ( ) ;
2019-10-08 19:37:15 +02:00
if ( ! success ) {
2020-03-31 23:59:11 +01:00
if ( error_callback )
error_callback ( " HTTP load failed " ) ;
2019-11-30 11:58:47 +01:00
return ;
2019-10-08 19:37:15 +02:00
}
2020-12-31 11:16:30 -07:00
deferred_invoke ( [ download ] ( auto & ) {
// Clear circular reference of `download` captured by copy
const_cast < Protocol : : Download & > ( * download ) . on_buffered_download_finish = nullptr ;
} ) ;
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 17:14:12 +03:30
success_callback ( payload , response_headers ) ;
2019-10-08 19:37:15 +02:00
} ;
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 17:14:12 +03:30
download - > set_should_buffer_all_input ( true ) ;
2020-08-02 05:27:42 +04:30
download - > on_certificate_requested = [ ] ( ) - > Protocol : : Download : : CertificateAndKey {
return { } ;
} ;
2019-11-24 14:24:09 +01:00
+ + m_pending_loads ;
if ( on_load_counter_change )
on_load_counter_change ( ) ;
2019-10-08 19:37:15 +02:00
return ;
}
2020-03-31 23:59:11 +01:00
if ( error_callback )
2021-01-03 14:05:46 +01:00
error_callback ( String : : formatted ( " Protocol not implemented: {} " , url . protocol ( ) ) ) ;
2019-10-08 19:37:15 +02:00
}
2020-03-07 10:27:02 +01:00
ProtocolServer: Stream the downloaded data if possible
This patchset makes ProtocolServer stream the downloads to its client
(LibProtocol), and as such changes the download API; a possible
download lifecycle could be as such:
notation = client->server:'>', server->client:'<', pipe activity:'*'
```
> StartDownload(GET, url, headers, {})
< Response(0, fd 8)
* {data, 1024b}
< HeadersBecameAvailable(0, response_headers, 200)
< DownloadProgress(0, 4K, 1024)
* {data, 1024b}
* {data, 1024b}
< DownloadProgress(0, 4K, 2048)
* {data, 1024b}
< DownloadProgress(0, 4K, 1024)
< DownloadFinished(0, true, 4K)
```
Since managing the received file descriptor is a pain, LibProtocol
implements `Download::stream_into(OutputStream)`, which can be used to
stream the download into any given output stream (be it a file, or
memory, or writing stuff with a delay, etc.).
Also, as some of the users of this API require all the downloaded data
upfront, LibProtocol also implements `set_should_buffer_all_input()`,
which causes the download instance to buffer all the data until the
download is complete, and to call the `on_buffered_download_finish`
hook.
2020-12-26 17:14:12 +03:30
void ResourceLoader : : load ( const URL & url , Function < void ( ReadonlyBytes , const HashMap < String , String , CaseInsensitiveStringTraits > & response_headers ) > success_callback , Function < void ( const String & ) > error_callback )
2020-09-28 11:55:26 +02:00
{
LoadRequest request ;
request . set_url ( url ) ;
load ( request , move ( success_callback ) , move ( error_callback ) ) ;
}
2020-04-26 22:08:03 +02:00
bool ResourceLoader : : is_port_blocked ( int port )
{
2020-04-12 04:01:34 +00:00
int ports [ ] { 1 , 7 , 9 , 11 , 13 , 15 , 17 , 19 , 20 , 21 , 22 , 23 , 25 , 37 , 42 ,
43 , 53 , 77 , 79 , 87 , 95 , 101 , 102 , 103 , 104 , 109 , 110 , 111 , 113 ,
115 , 117 , 119 , 123 , 135 , 139 , 143 , 179 , 389 , 465 , 512 , 513 , 514 ,
515 , 526 , 530 , 531 , 532 , 540 , 556 , 563 , 587 , 601 , 636 , 993 , 995 ,
2049 , 3659 , 4045 , 6000 , 6379 , 6665 , 6666 , 6667 , 6668 , 6669 , 9000 } ;
for ( auto blocked_port : ports )
if ( port = = blocked_port )
return true ;
return false ;
}
2020-03-07 10:27:02 +01:00
}