2023-04-06 15:50:22 +03:00
/*
* Copyright ( c ) 2020 , Andreas Kling < kling @ serenityos . org >
* Copyright ( c ) 2023 , Aliaksandr Kalenik < kalenik . aliaksandr @ gmail . com >
*
* SPDX - License - Identifier : BSD - 2 - Clause
*/
# include <AK/Debug.h>
# include <AK/LexicalPath.h>
# include <LibGemini/Document.h>
# include <LibGfx/ImageFormats/ImageDecoder.h>
# include <LibMarkdown/Document.h>
# include <LibTextCodec/Decoder.h>
# include <LibWeb/DOM/Document.h>
# include <LibWeb/DOM/DocumentLoading.h>
2023-04-06 18:10:12 +03:00
# include <LibWeb/HTML/Navigable.h>
2023-04-06 15:50:22 +03:00
# include <LibWeb/HTML/NavigationParams.h>
# include <LibWeb/HTML/Parser/HTMLEncodingDetection.h>
# include <LibWeb/HTML/Parser/HTMLParser.h>
2023-11-17 15:10:07 +02:00
# include <LibWeb/Loader/GeneratedPagesLoader.h>
2023-04-06 15:50:22 +03:00
# include <LibWeb/Namespace.h>
# include <LibWeb/Platform/ImageCodecPlugin.h>
# include <LibWeb/XML/XMLDocumentBuilder.h>
namespace Web {
static bool build_markdown_document ( DOM : : Document & document , ByteBuffer const & data )
{
auto markdown_document = Markdown : : Document : : parse ( data ) ;
if ( ! markdown_document )
return false ;
auto extra_head_contents = R " ~~~(
< style >
. zoomable {
cursor : zoom - in ;
max - width : 100 % ;
}
. zoomable . zoomed - in {
cursor : zoom - out ;
max - width : none ;
}
< / style >
< script >
function imageClickEventListener ( event ) {
let image = event . target ;
if ( image . classList . contains ( " zoomable " ) ) {
image . classList . toggle ( " zoomed-in " ) ;
}
}
function processImages ( ) {
let images = document . querySelectorAll ( " img " ) ;
let windowWidth = window . innerWidth ;
images . forEach ( ( image ) = > {
if ( image . naturalWidth > windowWidth ) {
image . classList . add ( " zoomable " ) ;
} else {
image . classList . remove ( " zoomable " ) ;
image . classList . remove ( " zoomed-in " ) ;
}
image . addEventListener ( " click " , imageClickEventListener ) ;
} ) ;
}
document . addEventListener ( " load " , ( ) = > {
processImages ( ) ;
} ) ;
window . addEventListener ( " resize " , ( ) = > {
processImages ( ) ;
} ) ;
< / script >
) ~ ~ ~ " sv;
auto parser = HTML : : HTMLParser : : create ( document , markdown_document - > render_to_html ( extra_head_contents ) , " utf-8 " ) ;
parser - > run ( document . url ( ) ) ;
return true ;
}
static bool build_text_document ( DOM : : Document & document , ByteBuffer const & data )
{
2023-11-04 18:42:04 +01:00
auto html_element = DOM : : create_element ( document , HTML : : TagNames : : html , Namespace : : HTML ) . release_value_but_fixme_should_propagate_errors ( ) ;
2023-04-06 15:50:22 +03:00
MUST ( document . append_child ( html_element ) ) ;
2023-11-04 18:42:04 +01:00
auto head_element = DOM : : create_element ( document , HTML : : TagNames : : head , Namespace : : HTML ) . release_value_but_fixme_should_propagate_errors ( ) ;
2023-04-06 15:50:22 +03:00
MUST ( html_element - > append_child ( head_element ) ) ;
2023-11-04 18:42:04 +01:00
auto title_element = DOM : : create_element ( document , HTML : : TagNames : : title , Namespace : : HTML ) . release_value_but_fixme_should_propagate_errors ( ) ;
2023-04-06 15:50:22 +03:00
MUST ( head_element - > append_child ( title_element ) ) ;
2023-09-15 21:46:58 +12:00
auto title_text = document . create_text_node ( MUST ( String : : from_deprecated_string ( document . url ( ) . basename ( ) ) ) ) ;
2023-04-06 15:50:22 +03:00
MUST ( title_element - > append_child ( title_text ) ) ;
2023-11-04 18:42:04 +01:00
auto body_element = DOM : : create_element ( document , HTML : : TagNames : : body , Namespace : : HTML ) . release_value_but_fixme_should_propagate_errors ( ) ;
2023-04-06 15:50:22 +03:00
MUST ( html_element - > append_child ( body_element ) ) ;
2023-11-04 18:42:04 +01:00
auto pre_element = DOM : : create_element ( document , HTML : : TagNames : : pre , Namespace : : HTML ) . release_value_but_fixme_should_propagate_errors ( ) ;
2023-04-06 15:50:22 +03:00
MUST ( body_element - > append_child ( pre_element ) ) ;
2023-09-15 21:46:58 +12:00
MUST ( pre_element - > append_child ( document . create_text_node ( String : : from_utf8 ( StringView { data } ) . release_value_but_fixme_should_propagate_errors ( ) ) ) ) ;
2023-04-06 15:50:22 +03:00
return true ;
}
static bool build_image_document ( DOM : : Document & document , ByteBuffer const & data )
{
auto image = Platform : : ImageCodecPlugin : : the ( ) . decode_image ( data ) ;
if ( ! image . has_value ( ) | | image - > frames . is_empty ( ) )
return false ;
auto const & frame = image - > frames [ 0 ] ;
auto const & bitmap = frame . bitmap ;
if ( ! bitmap )
return false ;
2023-11-04 18:42:04 +01:00
auto html_element = DOM : : create_element ( document , HTML : : TagNames : : html , Namespace : : HTML ) . release_value_but_fixme_should_propagate_errors ( ) ;
2023-04-06 15:50:22 +03:00
MUST ( document . append_child ( html_element ) ) ;
2023-11-04 18:42:04 +01:00
auto head_element = DOM : : create_element ( document , HTML : : TagNames : : head , Namespace : : HTML ) . release_value_but_fixme_should_propagate_errors ( ) ;
2023-04-06 15:50:22 +03:00
MUST ( html_element - > append_child ( head_element ) ) ;
2023-11-04 18:42:04 +01:00
auto title_element = DOM : : create_element ( document , HTML : : TagNames : : title , Namespace : : HTML ) . release_value_but_fixme_should_propagate_errors ( ) ;
2023-04-06 15:50:22 +03:00
MUST ( head_element - > append_child ( title_element ) ) ;
auto basename = LexicalPath : : basename ( document . url ( ) . serialize_path ( ) ) ;
2023-09-06 15:17:20 +12:00
auto title_text = document . heap ( ) . allocate < DOM : : Text > ( document . realm ( ) , document , MUST ( String : : formatted ( " {} [{}x{}] " , basename , bitmap - > width ( ) , bitmap - > height ( ) ) ) ) ;
2023-04-06 15:50:22 +03:00
MUST ( title_element - > append_child ( * title_text ) ) ;
2023-11-04 18:42:04 +01:00
auto body_element = DOM : : create_element ( document , HTML : : TagNames : : body , Namespace : : HTML ) . release_value_but_fixme_should_propagate_errors ( ) ;
2023-04-06 15:50:22 +03:00
MUST ( html_element - > append_child ( body_element ) ) ;
2023-11-04 18:42:04 +01:00
auto image_element = DOM : : create_element ( document , HTML : : TagNames : : img , Namespace : : HTML ) . release_value_but_fixme_should_propagate_errors ( ) ;
2023-10-08 11:42:00 +13:00
MUST ( image_element - > set_attribute ( HTML : : AttributeNames : : src , MUST ( document . url ( ) . to_string ( ) ) ) ) ;
2023-04-06 15:50:22 +03:00
MUST ( body_element - > append_child ( image_element ) ) ;
return true ;
}
static bool build_gemini_document ( DOM : : Document & document , ByteBuffer const & data )
{
StringView gemini_data { data } ;
auto gemini_document = Gemini : : Document : : parse ( gemini_data , document . url ( ) ) ;
DeprecatedString html_data = gemini_document - > render_to_html ( ) ;
dbgln_if ( GEMINI_DEBUG , " Gemini data: \n \" \" \" {} \" \" \" " , gemini_data ) ;
dbgln_if ( GEMINI_DEBUG , " Converted to HTML: \n \" \" \" {} \" \" \" " , html_data ) ;
auto parser = HTML : : HTMLParser : : create ( document , html_data , " utf-8 " ) ;
parser - > run ( document . url ( ) ) ;
return true ;
}
2023-09-14 21:07:53 +02:00
bool build_xml_document ( DOM : : Document & document , ByteBuffer const & data )
2023-04-06 15:50:22 +03:00
{
auto encoding = HTML : : run_encoding_sniffing_algorithm ( document , data ) ;
auto decoder = TextCodec : : decoder_for ( encoding ) ;
VERIFY ( decoder . has_value ( ) ) ;
auto source = decoder - > to_utf8 ( data ) . release_value_but_fixme_should_propagate_errors ( ) ;
XML : : Parser parser ( source , { . resolve_external_resource = resolve_xml_resource } ) ;
XMLDocumentBuilder builder { document } ;
auto result = parser . parse_with_listener ( builder ) ;
return ! result . is_error ( ) & & ! builder . has_error ( ) ;
}
static bool build_video_document ( DOM : : Document & document )
{
2023-11-04 18:42:04 +01:00
auto html_element = DOM : : create_element ( document , HTML : : TagNames : : html , Namespace : : HTML ) . release_value_but_fixme_should_propagate_errors ( ) ;
2023-04-06 15:50:22 +03:00
MUST ( document . append_child ( html_element ) ) ;
2023-11-04 18:42:04 +01:00
auto head_element = DOM : : create_element ( document , HTML : : TagNames : : head , Namespace : : HTML ) . release_value_but_fixme_should_propagate_errors ( ) ;
2023-04-06 15:50:22 +03:00
MUST ( html_element - > append_child ( head_element ) ) ;
2023-11-04 18:42:04 +01:00
auto body_element = DOM : : create_element ( document , HTML : : TagNames : : body , Namespace : : HTML ) . release_value_but_fixme_should_propagate_errors ( ) ;
2023-04-06 15:50:22 +03:00
MUST ( html_element - > append_child ( body_element ) ) ;
2023-11-04 18:42:04 +01:00
auto video_element = DOM : : create_element ( document , HTML : : TagNames : : video , Namespace : : HTML ) . release_value_but_fixme_should_propagate_errors ( ) ;
2023-10-08 11:42:00 +13:00
MUST ( video_element - > set_attribute ( HTML : : AttributeNames : : src , MUST ( document . url ( ) . to_string ( ) ) ) ) ;
MUST ( video_element - > set_attribute ( HTML : : AttributeNames : : autoplay , String { } ) ) ;
MUST ( video_element - > set_attribute ( HTML : : AttributeNames : : controls , String { } ) ) ;
2023-04-06 15:50:22 +03:00
MUST ( body_element - > append_child ( video_element ) ) ;
return true ;
}
2023-06-17 11:46:52 +01:00
static bool build_audio_document ( DOM : : Document & document )
{
2023-11-04 18:42:04 +01:00
auto html_element = DOM : : create_element ( document , HTML : : TagNames : : html , Namespace : : HTML ) . release_value_but_fixme_should_propagate_errors ( ) ;
2023-06-17 11:46:52 +01:00
MUST ( document . append_child ( html_element ) ) ;
2023-11-04 18:42:04 +01:00
auto head_element = DOM : : create_element ( document , HTML : : TagNames : : head , Namespace : : HTML ) . release_value_but_fixme_should_propagate_errors ( ) ;
2023-06-17 11:46:52 +01:00
MUST ( html_element - > append_child ( head_element ) ) ;
2023-11-04 18:42:04 +01:00
auto body_element = DOM : : create_element ( document , HTML : : TagNames : : body , Namespace : : HTML ) . release_value_but_fixme_should_propagate_errors ( ) ;
2023-06-17 11:46:52 +01:00
MUST ( html_element - > append_child ( body_element ) ) ;
2023-11-04 18:42:04 +01:00
auto video_element = DOM : : create_element ( document , HTML : : TagNames : : audio , Namespace : : HTML ) . release_value_but_fixme_should_propagate_errors ( ) ;
2023-10-08 11:42:00 +13:00
MUST ( video_element - > set_attribute ( HTML : : AttributeNames : : src , MUST ( document . url ( ) . to_string ( ) ) ) ) ;
MUST ( video_element - > set_attribute ( HTML : : AttributeNames : : autoplay , String { } ) ) ;
MUST ( video_element - > set_attribute ( HTML : : AttributeNames : : controls , String { } ) ) ;
2023-06-17 11:46:52 +01:00
MUST ( body_element - > append_child ( video_element ) ) ;
return true ;
}
2023-04-06 15:50:22 +03:00
bool parse_document ( DOM : : Document & document , ByteBuffer const & data )
{
auto & mime_type = document . content_type ( ) ;
2023-06-08 11:38:46 -04:00
if ( mime_type = = " text/html " ) {
2023-04-06 15:50:22 +03:00
auto parser = HTML : : HTMLParser : : create_with_uncertain_encoding ( document , data ) ;
parser - > run ( document . url ( ) ) ;
return true ;
}
2023-09-15 21:46:58 +12:00
if ( mime_type . ends_with_bytes ( " +xml " sv ) | | mime_type . is_one_of ( " text/xml " , " application/xml " ) )
2023-04-06 15:50:22 +03:00
return build_xml_document ( document , data ) ;
2023-09-15 21:46:58 +12:00
if ( mime_type . starts_with_bytes ( " image/ " sv ) )
2023-04-06 15:50:22 +03:00
return build_image_document ( document , data ) ;
2023-09-15 21:46:58 +12:00
if ( mime_type . starts_with_bytes ( " video/ " sv ) )
2023-04-06 15:50:22 +03:00
return build_video_document ( document ) ;
2023-09-15 21:46:58 +12:00
if ( mime_type . starts_with_bytes ( " audio/ " sv ) )
2023-06-17 11:46:52 +01:00
return build_audio_document ( document ) ;
2023-04-06 15:50:22 +03:00
if ( mime_type = = " text/plain " | | mime_type = = " application/json " )
return build_text_document ( document , data ) ;
if ( mime_type = = " text/markdown " )
return build_markdown_document ( document , data ) ;
if ( mime_type = = " text/gemini " )
return build_gemini_document ( document , data ) ;
return false ;
}
2023-09-15 13:34:40 +02:00
static bool is_supported_document_mime_type ( StringView mime_type )
{
if ( mime_type = = " text/html " )
return true ;
if ( mime_type . ends_with ( " +xml " sv ) | | mime_type . is_one_of ( " text/xml " , " application/xml " ) )
return true ;
if ( mime_type . starts_with ( " image/ " sv ) )
return true ;
if ( mime_type . starts_with ( " video/ " sv ) )
return true ;
if ( mime_type . starts_with ( " audio/ " sv ) )
return true ;
if ( mime_type = = " text/plain " | | mime_type = = " application/json " )
return true ;
if ( mime_type = = " text/markdown " )
return true ;
if ( mime_type = = " text/gemini " )
return true ;
return false ;
}
2023-04-06 18:10:12 +03:00
// https://html.spec.whatwg.org/multipage/browsing-the-web.html#loading-a-document
JS : : GCPtr < DOM : : Document > load_document ( Optional < HTML : : NavigationParams > navigation_params )
{
VERIFY ( navigation_params . has_value ( ) ) ;
2023-09-15 13:34:40 +02:00
auto extracted_mime_type = navigation_params - > response - > header_list ( ) - > extract_mime_type ( ) . release_value_but_fixme_should_propagate_errors ( ) ;
auto mime_type = extracted_mime_type . has_value ( ) ? extracted_mime_type . value ( ) . essence ( ) . bytes_as_string_view ( ) : StringView { } ;
if ( ! is_supported_document_mime_type ( mime_type ) ) {
return nullptr ;
}
2023-04-06 18:10:12 +03:00
auto document = DOM : : Document : : create_and_initialize ( DOM : : Document : : Type : : HTML , " text/html " , * navigation_params ) . release_value_but_fixme_should_propagate_errors ( ) ;
2023-09-15 13:34:40 +02:00
document - > set_content_type ( String : : from_utf8 ( mime_type ) . release_value_but_fixme_should_propagate_errors ( ) ) ;
2023-04-06 18:10:12 +03:00
auto & realm = document - > realm ( ) ;
2023-08-18 19:38:13 +02:00
if ( navigation_params - > response - > body ( ) ) {
2023-11-17 15:10:07 +02:00
auto process_body = [ document , url = navigation_params - > response - > url ( ) . value ( ) ] ( ByteBuffer bytes ) {
if ( parse_document ( * document , bytes ) )
return ;
document - > remove_all_children ( true ) ;
auto error_html = load_error_page ( url ) . release_value_but_fixme_should_propagate_errors ( ) ;
auto parser = HTML : : HTMLParser : : create ( document , error_html , " utf-8 " ) ;
document - > set_url ( AK : : URL ( " about:error " ) ) ;
parser - > run ( ) ;
2023-04-06 18:10:12 +03:00
} ;
2023-05-13 12:44:46 +01:00
auto process_body_error = [ ] ( auto ) {
2023-09-15 13:34:40 +02:00
dbgln ( " FIXME: Load html page with an error if read of body failed. " ) ;
2023-04-06 18:10:12 +03:00
} ;
navigation_params - > response - > body ( ) - > fully_read (
realm ,
move ( process_body ) ,
move ( process_body_error ) ,
JS : : NonnullGCPtr { realm . global_object ( ) } )
. release_value_but_fixme_should_propagate_errors ( ) ;
}
return document ;
}
// https://html.spec.whatwg.org/multipage/document-lifecycle.html#read-ua-inline
JS : : GCPtr < DOM : : Document > create_document_for_inline_content ( JS : : GCPtr < HTML : : Navigable > navigable , Optional < String > navigation_id , StringView content_html )
{
auto & vm = navigable - > vm ( ) ;
// 1. Let origin be a new opaque origin.
HTML : : Origin origin { } ;
// 2. Let coop be a new cross-origin opener policy.
auto coop = HTML : : CrossOriginOpenerPolicy { } ;
// 3. Let coopEnforcementResult be a new cross-origin opener policy enforcement result with
// url: response's URL
// origin: origin
// cross-origin opener policy: coop
HTML : : CrossOriginOpenerPolicyEnforcementResult coop_enforcement_result {
. url = AK : : URL ( " about:error " ) , // AD-HOC
. origin = origin ,
. cross_origin_opener_policy = coop
} ;
// 4. Let navigationParams be a new navigation params with
// id: navigationId
2023-09-21 13:47:19 -06:00
// navigable: navigable
2023-04-06 18:10:12 +03:00
// request: null
// response: a new response
// origin: origin
2023-09-21 13:47:19 -06:00
// fetch controller: null
// commit early hints: null
// COOP enforcement result: coopEnforcementResult
// reserved environment: null
2023-04-06 18:10:12 +03:00
// policy container: a new policy container
// final sandboxing flag set: an empty set
// cross-origin opener policy: coop
// FIXME: navigation timing type: navTimingType
2023-09-21 13:47:19 -06:00
// about base URL: null
2023-04-06 18:10:12 +03:00
auto response = Fetch : : Infrastructure : : Response : : create ( vm ) ;
response - > url_list ( ) . append ( AK : : URL ( " about:error " ) ) ; // AD-HOC: https://github.com/whatwg/html/issues/9122
HTML : : NavigationParams navigation_params {
. id = navigation_id ,
2023-09-21 13:47:19 -06:00
. navigable = navigable ,
2023-04-06 18:10:12 +03:00
. request = { } ,
. response = * response ,
2023-09-21 13:47:19 -06:00
. fetch_controller = nullptr ,
. commit_early_hints = nullptr ,
. coop_enforcement_result = move ( coop_enforcement_result ) ,
. reserved_environment = { } ,
2023-04-06 18:10:12 +03:00
. origin = move ( origin ) ,
. policy_container = HTML : : PolicyContainer { } ,
. final_sandboxing_flag_set = HTML : : SandboxingFlagSet { } ,
. cross_origin_opener_policy = move ( coop ) ,
2023-09-21 13:47:19 -06:00
. about_base_url = { } ,
2023-04-06 18:10:12 +03:00
} ;
// 5. Let document be the result of creating and initializing a Document object given "html", "text/html", and navigationParams.
auto document = DOM : : Document : : create_and_initialize ( DOM : : Document : : Type : : HTML , " text/html " , navigation_params ) . release_value_but_fixme_should_propagate_errors ( ) ;
// 6. Either associate document with a custom rendering that is not rendered using the normal Document rendering rules, or mutate document until it represents the content the
// user agent wants to render.
auto parser = HTML : : HTMLParser : : create ( document , content_html , " utf-8 " ) ;
2023-09-21 17:55:14 +02:00
document - > set_url ( AK : : URL ( " about:error " ) ) ;
parser - > run ( ) ;
2023-04-06 18:10:12 +03:00
// 7. Return document.
return document ;
}
2023-04-06 15:50:22 +03:00
}