mirror of
				https://github.com/LadybirdBrowser/ladybird.git
				synced 2025-11-04 07:10:57 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			105 lines
		
	
	
	
		
			3.5 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			105 lines
		
	
	
	
		
			3.5 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
/*
 | 
						|
 * Copyright (c) 2020, Andreas Kling <andreas@ladybird.org>
 | 
						|
 *
 | 
						|
 * SPDX-License-Identifier: BSD-2-Clause
 | 
						|
 */
 | 
						|
 | 
						|
#include <AK/HashTable.h>
 | 
						|
#include <LibWeb/HTML/Parser/HTMLToken.h>
 | 
						|
 | 
						|
namespace Web::HTML {
 | 
						|
 | 
						|
String HTMLToken::to_string() const
 | 
						|
{
 | 
						|
    StringBuilder builder;
 | 
						|
 | 
						|
    switch (type()) {
 | 
						|
    case HTMLToken::Type::DOCTYPE:
 | 
						|
        builder.append("DOCTYPE"sv);
 | 
						|
        builder.append(" { name: '"sv);
 | 
						|
        builder.append(doctype_data().name);
 | 
						|
        builder.append("' }"sv);
 | 
						|
        break;
 | 
						|
    case HTMLToken::Type::StartTag:
 | 
						|
        builder.append("StartTag"sv);
 | 
						|
        break;
 | 
						|
    case HTMLToken::Type::EndTag:
 | 
						|
        builder.append("EndTag"sv);
 | 
						|
        break;
 | 
						|
    case HTMLToken::Type::Comment:
 | 
						|
        builder.append("Comment"sv);
 | 
						|
        break;
 | 
						|
    case HTMLToken::Type::Character:
 | 
						|
        builder.append("Character"sv);
 | 
						|
        break;
 | 
						|
    case HTMLToken::Type::EndOfFile:
 | 
						|
        builder.append("EndOfFile"sv);
 | 
						|
        break;
 | 
						|
    case HTMLToken::Type::Invalid:
 | 
						|
        VERIFY_NOT_REACHED();
 | 
						|
    }
 | 
						|
 | 
						|
    if (type() == HTMLToken::Type::StartTag || type() == HTMLToken::Type::EndTag) {
 | 
						|
        builder.append(" { name: '"sv);
 | 
						|
        builder.append(tag_name());
 | 
						|
        builder.append("', { "sv);
 | 
						|
        for_each_attribute([&](auto& attribute) {
 | 
						|
            builder.append(attribute.local_name);
 | 
						|
            builder.append("=\""sv);
 | 
						|
            builder.append(attribute.value);
 | 
						|
            builder.append("\" "sv);
 | 
						|
            return IterationDecision::Continue;
 | 
						|
        });
 | 
						|
        builder.append("} }"sv);
 | 
						|
    }
 | 
						|
 | 
						|
    if (is_comment()) {
 | 
						|
        builder.append(" { data: '"sv);
 | 
						|
        builder.append(comment());
 | 
						|
        builder.append("' }"sv);
 | 
						|
    }
 | 
						|
 | 
						|
    if (is_character()) {
 | 
						|
        builder.append(" { data: '"sv);
 | 
						|
        builder.append_code_point(code_point());
 | 
						|
        builder.append("' }"sv);
 | 
						|
    }
 | 
						|
 | 
						|
    if (type() == HTMLToken::Type::Character) {
 | 
						|
        builder.appendff("@{}:{}", m_start_position.line, m_start_position.column);
 | 
						|
    } else {
 | 
						|
        builder.appendff("@{}:{}-{}:{}", m_start_position.line, m_start_position.column, m_end_position.line, m_end_position.column);
 | 
						|
    }
 | 
						|
 | 
						|
    return MUST(builder.to_string());
 | 
						|
}
 | 
						|
 | 
						|
void HTMLToken::normalize_attributes()
 | 
						|
{
 | 
						|
    // From AttributeNameState: https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state
 | 
						|
    //
 | 
						|
    // When the user agent leaves the attribute name state (and before emitting the tag token, if appropriate),
 | 
						|
    // the complete attribute's name must be compared to the other attributes on the same token;
 | 
						|
    // if there is already an attribute on the token with the exact same name, then this is a duplicate-attribute
 | 
						|
    // parse error and the new attribute must be removed from the token.
 | 
						|
 | 
						|
    // NOTE: If an attribute is so removed from a token, it, and the value that gets associated with it, if any,
 | 
						|
    // are never subsequently used by the parser, and are therefore effectively discarded. Removing the attribute
 | 
						|
    // in this way does not change its status as the "current attribute" for the purposes of the tokenizer, however.
 | 
						|
 | 
						|
    HashTable<FlyString> seen_attributes;
 | 
						|
    auto* ptr = tag_attributes();
 | 
						|
    if (!ptr)
 | 
						|
        return;
 | 
						|
    auto& tag_attributes = *ptr;
 | 
						|
    for (size_t i = 0; i < tag_attributes.size(); ++i) {
 | 
						|
        auto& attribute = tag_attributes[i];
 | 
						|
        if (seen_attributes.set(attribute.local_name, AK::HashSetExistingEntryBehavior::Keep) == AK::HashSetResult::KeptExistingEntry) {
 | 
						|
            // This is a duplicate attribute, remove it.
 | 
						|
            tag_attributes.remove(i);
 | 
						|
            --i;
 | 
						|
        }
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
}
 |