mirror of
				https://github.com/LadybirdBrowser/ladybird.git
				synced 2025-11-04 07:10:57 +00:00 
			
		
		
		
	This includes all the DOCTYPE and Character reference states, as well as a few RAWTEXT ones that were missing by accident.
		
			
				
	
	
		
			314 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			Swift
		
	
	
	
	
	
			
		
		
	
	
			314 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			Swift
		
	
	
	
	
	
/*
 | 
						|
 * Copyright (c) 2024, Andrew Kaster <andrew@ladybird.org>
 | 
						|
 *
 | 
						|
 * SPDX-License-Identifier: BSD-2-Clause
 | 
						|
 */
 | 
						|
 | 
						|
import AK
 | 
						|
import Testing
 | 
						|
import Web
 | 
						|
 | 
						|
@Suite
 | 
						|
struct TestHTMLTokenizerSwift {
 | 
						|
 | 
						|
    @Test func tokenTypes() {
 | 
						|
        let default_token = HTMLToken()
 | 
						|
        default_token.type = .Character(codePoint: "a")
 | 
						|
        #expect(default_token.isCharacter())
 | 
						|
 | 
						|
        #expect("\(default_token)" == "HTMLToken(type: Character(codePoint: a))")
 | 
						|
    }
 | 
						|
 | 
						|
    @Test func parserWhitespace() {
 | 
						|
        for codePoint: Character in ["\t", "\n", "\r", "\u{000C}", " "] {
 | 
						|
            let token = HTMLToken(type: .Character(codePoint: codePoint))
 | 
						|
            #expect(token.isParserWhitespace())
 | 
						|
        }
 | 
						|
 | 
						|
        for codePoint: Character in ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"] {
 | 
						|
            let token = HTMLToken(type: .Character(codePoint: codePoint))
 | 
						|
            #expect(!token.isParserWhitespace())
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    @Test func dataStateNoInput() {
 | 
						|
        let tokenizer = HTMLTokenizer()
 | 
						|
        #expect(tokenizer.state == HTMLTokenizer.State.Data)  // initial state
 | 
						|
 | 
						|
        let token = tokenizer.nextToken()
 | 
						|
        #expect(token?.type == .EndOfFile)
 | 
						|
 | 
						|
        let token2 = tokenizer.nextToken()
 | 
						|
        #expect(token2 == nil)
 | 
						|
        #expect(tokenizer.state == HTMLTokenizer.State.Data)
 | 
						|
    }
 | 
						|
 | 
						|
    @Test func dataStateSingleChar() {
 | 
						|
        guard let tokenizer = HTMLTokenizer(input: "X") else {
 | 
						|
            Issue.record("Failed to create tokenizer for 'X'")
 | 
						|
            return
 | 
						|
        }
 | 
						|
        #expect(tokenizer.state == HTMLTokenizer.State.Data)  // initial state
 | 
						|
 | 
						|
        let token = tokenizer.nextToken()
 | 
						|
        #expect(token?.type == .Character(codePoint: "X"))
 | 
						|
 | 
						|
        let token2 = tokenizer.nextToken()
 | 
						|
        #expect(token2?.type == .EndOfFile)
 | 
						|
 | 
						|
        let token3 = tokenizer.nextToken()
 | 
						|
        #expect(token3 == nil)
 | 
						|
        #expect(tokenizer.state == HTMLTokenizer.State.Data)
 | 
						|
    }
 | 
						|
 | 
						|
    @Test func dataStateAmpersand() {
 | 
						|
        guard let tokenizer = HTMLTokenizer(input: "&") else {
 | 
						|
            Issue.record("Failed to create tokenizer for '&'")
 | 
						|
            return
 | 
						|
        }
 | 
						|
        #expect(tokenizer.state == HTMLTokenizer.State.Data)  // initial state
 | 
						|
 | 
						|
        let token = tokenizer.nextToken()
 | 
						|
        #expect(token?.type == .Character(codePoint: "&"))
 | 
						|
 | 
						|
        let token2 = tokenizer.nextToken()
 | 
						|
        #expect(token2?.type == .EndOfFile)
 | 
						|
 | 
						|
        let token3 = tokenizer.nextToken()
 | 
						|
        #expect(token3 == nil)
 | 
						|
    }
 | 
						|
 | 
						|
    @Test func tagOpenOnly() {
 | 
						|
        guard let tokenizer = HTMLTokenizer(input: "<") else {
 | 
						|
            Issue.record("Failed to create tokenizer for '<'")
 | 
						|
            return
 | 
						|
        }
 | 
						|
        #expect(tokenizer.state == HTMLTokenizer.State.Data)  // initial state
 | 
						|
 | 
						|
        let token = tokenizer.nextToken()
 | 
						|
        #expect(token?.type == .Character(codePoint: "<"))
 | 
						|
 | 
						|
        let token2 = tokenizer.nextToken()
 | 
						|
        #expect(token2?.type == .EndOfFile)
 | 
						|
        #expect(tokenizer.state == HTMLTokenizer.State.TagOpen)
 | 
						|
 | 
						|
        let token3 = tokenizer.nextToken()
 | 
						|
        #expect(token3 == nil)
 | 
						|
    }
 | 
						|
 | 
						|
    @Test func dataStateNulChar() {
 | 
						|
        guard let tokenizer = HTMLTokenizer(input: "H\0I") else {
 | 
						|
            Issue.record("Failed to create tokenizer for 'H\\0I'")
 | 
						|
            return
 | 
						|
        }
 | 
						|
        #expect(tokenizer.state == HTMLTokenizer.State.Data)  // initial state
 | 
						|
 | 
						|
        let token = tokenizer.nextToken()
 | 
						|
        #expect(token?.type == .Character(codePoint: "H"))
 | 
						|
 | 
						|
        let token2 = tokenizer.nextToken()
 | 
						|
        #expect(token2?.type == .Character(codePoint: "\u{FFFD}"))
 | 
						|
 | 
						|
        let token3 = tokenizer.nextToken()
 | 
						|
        #expect(token3?.type == .Character(codePoint: "I"))
 | 
						|
 | 
						|
        let token4 = tokenizer.nextToken()
 | 
						|
        #expect(token4?.type == .EndOfFile)
 | 
						|
 | 
						|
        #expect(tokenizer.state == HTMLTokenizer.State.Data)
 | 
						|
    }
 | 
						|
 | 
						|
    @Test func scriptTagWithAttributes() {
 | 
						|
        guard let tokenizer = HTMLTokenizer(input: "<script type=\"text/javascript\">") else {
 | 
						|
            Issue.record("Failed to create tokenizer for '<script type=\"text/javascript\">'")
 | 
						|
            return
 | 
						|
        }
 | 
						|
        #expect(tokenizer.state == HTMLTokenizer.State.Data)  // initial state
 | 
						|
 | 
						|
        let token = tokenizer.nextToken()
 | 
						|
        #expect(token?.type == .StartTag(tagName: "script", attributes: [HTMLToken.Attribute(localName: "type", value: "text/javascript")]))
 | 
						|
 | 
						|
        let token2 = tokenizer.nextToken()
 | 
						|
        #expect(token2?.type == .EndOfFile)
 | 
						|
 | 
						|
        #expect(tokenizer.state == HTMLTokenizer.State.Data)
 | 
						|
    }
 | 
						|
 | 
						|
    @Test func scriptWithContent() {
 | 
						|
        guard let tokenizer = HTMLTokenizer(input: "<script>var x = 1;</script>") else {
 | 
						|
            Issue.record("Failed to create tokenizer for '<script>var x = 1;</script>'")
 | 
						|
            return
 | 
						|
        }
 | 
						|
 | 
						|
        let token = tokenizer.nextToken()
 | 
						|
        #expect(token?.type == .StartTag(tagName: "script", attributes: []))
 | 
						|
 | 
						|
        for codePoint in "var x = 1;" {
 | 
						|
            let token = tokenizer.nextToken()
 | 
						|
            #expect(token?.type == .Character(codePoint: codePoint))
 | 
						|
        }
 | 
						|
 | 
						|
        let token2 = tokenizer.nextToken()
 | 
						|
        #expect(token2?.type == .EndTag(tagName: "script"))
 | 
						|
 | 
						|
        let token3 = tokenizer.nextToken()
 | 
						|
        #expect(token3?.type == .EndOfFile)
 | 
						|
    }
 | 
						|
 | 
						|
    @Test func simpleDivWithContent() {
 | 
						|
        guard let tokenizer = HTMLTokenizer(input: "<div>hi</div>") else {
 | 
						|
            Issue.record("Failed to create tokenizer for '<div>hi</div>'")
 | 
						|
            return
 | 
						|
        }
 | 
						|
        #expect(tokenizer.state == HTMLTokenizer.State.Data)  // initial state
 | 
						|
 | 
						|
        let token = tokenizer.nextToken()
 | 
						|
        #expect(token?.type == .StartTag(tagName: "div", attributes: []))
 | 
						|
 | 
						|
        let token2 = tokenizer.nextToken()
 | 
						|
        #expect(token2?.type == .Character(codePoint: "h"))
 | 
						|
 | 
						|
        let token3 = tokenizer.nextToken()
 | 
						|
        #expect(token3?.type == .Character(codePoint: "i"))
 | 
						|
 | 
						|
        let token4 = tokenizer.nextToken()
 | 
						|
        #expect(token4?.type == .EndTag(tagName: "div"))
 | 
						|
 | 
						|
        let token5 = tokenizer.nextToken()
 | 
						|
        #expect(token5?.type == .EndOfFile)
 | 
						|
    }
 | 
						|
 | 
						|
    @Test func simpleDivWithContentAndAttributes() {
 | 
						|
        guard let tokenizer = HTMLTokenizer(input: "<div class=\"foo\">hi</div>") else {
 | 
						|
            Issue.record("Failed to create tokenizer for '<div class=\"foo\">hi</div>'")
 | 
						|
            return
 | 
						|
        }
 | 
						|
        #expect(tokenizer.state == HTMLTokenizer.State.Data)  // initial state
 | 
						|
 | 
						|
        let token = tokenizer.nextToken()
 | 
						|
        #expect(token?.type == .StartTag(tagName: "div", attributes: [HTMLToken.Attribute(localName: "class", value: "foo")]))
 | 
						|
 | 
						|
        let token2 = tokenizer.nextToken()
 | 
						|
        #expect(token2?.type == .Character(codePoint: "h"))
 | 
						|
 | 
						|
        let token3 = tokenizer.nextToken()
 | 
						|
        #expect(token3?.type == .Character(codePoint: "i"))
 | 
						|
 | 
						|
        let token4 = tokenizer.nextToken()
 | 
						|
        #expect(token4?.type == .EndTag(tagName: "div"))
 | 
						|
 | 
						|
        let token5 = tokenizer.nextToken()
 | 
						|
        #expect(token5?.type == .EndOfFile)
 | 
						|
    }
 | 
						|
 | 
						|
    @Test func severalDivsWithAttributesAndContent() {
 | 
						|
        // Explicitly use unquoted and single quotes for attribute values
 | 
						|
        guard let tokenizer = HTMLTokenizer(input: "<div class=foo>hi</div><div class='bar'>bye</div>") else {
 | 
						|
            Issue.record("Failed to create tokenizer for '<div class=\"foo\">hi</div><div class=\"bar\">bye</div>'")
 | 
						|
            return
 | 
						|
        }
 | 
						|
 | 
						|
        let token = tokenizer.nextToken()
 | 
						|
        #expect(token?.type == .StartTag(tagName: "div", attributes: [HTMLToken.Attribute(localName: "class", value: "foo")]))
 | 
						|
 | 
						|
        for codePoint in "hi" {
 | 
						|
            let token = tokenizer.nextToken()
 | 
						|
            #expect(token?.type == .Character(codePoint: codePoint))
 | 
						|
        }
 | 
						|
 | 
						|
        let token2 = tokenizer.nextToken()
 | 
						|
        #expect(token2?.type == .EndTag(tagName: "div"))
 | 
						|
 | 
						|
        let token3 = tokenizer.nextToken()
 | 
						|
        #expect(token3?.type == .StartTag(tagName: "div", attributes: [HTMLToken.Attribute(localName: "class", value: "bar")]))
 | 
						|
 | 
						|
        for codePoint in "bye" {
 | 
						|
            let token = tokenizer.nextToken()
 | 
						|
            #expect(token?.type == .Character(codePoint: codePoint))
 | 
						|
        }
 | 
						|
 | 
						|
        let token4 = tokenizer.nextToken()
 | 
						|
        #expect(token4?.type == .EndTag(tagName: "div"))
 | 
						|
 | 
						|
        let token5 = tokenizer.nextToken()
 | 
						|
        #expect(token5?.type == .EndOfFile)
 | 
						|
    }
 | 
						|
 | 
						|
    @Test func startTagWithMultipleAttributes() {
 | 
						|
        guard let tokenizer = HTMLTokenizer(input: "<div class=\"foo\" id=\"bar\">hi</div attr=endTagAttributeWhee>") else {
 | 
						|
            Issue.record("Failed to create tokenizer for '<div class=\"foo\" id=\"bar\">hi</div>'")
 | 
						|
            return
 | 
						|
        }
 | 
						|
 | 
						|
        let token = tokenizer.nextToken()
 | 
						|
        #expect(token?.type == .StartTag(tagName: "div", attributes: [HTMLToken.Attribute(localName: "class", value: "foo"), HTMLToken.Attribute(localName: "id", value: "bar")]))
 | 
						|
 | 
						|
        for codePoint in "hi" {
 | 
						|
            let token = tokenizer.nextToken()
 | 
						|
            #expect(token?.type == .Character(codePoint: codePoint))
 | 
						|
        }
 | 
						|
 | 
						|
        let token2 = tokenizer.nextToken()
 | 
						|
        #expect(token2?.type == .EndTag(tagName: "div", attributes: [HTMLToken.Attribute(localName: "attr", value: "endTagAttributeWhee")]))
 | 
						|
 | 
						|
        let token3 = tokenizer.nextToken()
 | 
						|
        #expect(token3?.type == .EndOfFile)
 | 
						|
    }
 | 
						|
 | 
						|
    @Test func xmlDeclaration() {
 | 
						|
        guard let tokenizer = HTMLTokenizer(input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>") else {
 | 
						|
            Issue.record("Failed to create tokenizer for '<?xml version=\"1.0\" encoding=\"UTF-8\"?>'")
 | 
						|
            return
 | 
						|
        }
 | 
						|
 | 
						|
        let token = tokenizer.nextToken()
 | 
						|
        #expect(token?.type == .Comment(data: "?xml version=\"1.0\" encoding=\"UTF-8\"?"))
 | 
						|
 | 
						|
        let token2 = tokenizer.nextToken()
 | 
						|
        #expect(token2?.type == .EndOfFile)
 | 
						|
    }
 | 
						|
 | 
						|
    @Test func simpleComment() {
 | 
						|
        guard let tokenizer = HTMLTokenizer(input: "<!-- comment -->") else {
 | 
						|
            Issue.record("Failed to create tokenizer for '<!-- comment -->'")
 | 
						|
            return
 | 
						|
        }
 | 
						|
 | 
						|
        let token = tokenizer.nextToken()
 | 
						|
        #expect(token?.type == .Comment(data: " comment "))
 | 
						|
 | 
						|
        let token2 = tokenizer.nextToken()
 | 
						|
        #expect(token2?.type == .EndOfFile)
 | 
						|
    }
 | 
						|
 | 
						|
    @Test func nestedComment() {
 | 
						|
        guard let tokenizer = HTMLTokenizer(input: "<!-- <!-- nested --> -->") else {
 | 
						|
            Issue.record("Failed to create tokenizer for '<!-- <!-- nested --> -->'")
 | 
						|
            return
 | 
						|
        }
 | 
						|
 | 
						|
        let token = tokenizer.nextToken()
 | 
						|
        #expect(token?.type == .Comment(data: " <!-- nested "))
 | 
						|
 | 
						|
        for codePoint in " -->" {
 | 
						|
            let token = tokenizer.nextToken()
 | 
						|
            #expect(token?.type == .Character(codePoint: codePoint))
 | 
						|
        }
 | 
						|
 | 
						|
        let token2 = tokenizer.nextToken()
 | 
						|
        #expect(token2?.type == .EndOfFile)
 | 
						|
    }
 | 
						|
 | 
						|
    @Test func commentWithScriptTagInside() {
 | 
						|
        guard let tokenizer = HTMLTokenizer(input: "<!-- <script>var x = 1;</script> -->") else {
 | 
						|
            Issue.record("Failed to create tokenizer for '<!-- <script>var x = 1;</script> -->'")
 | 
						|
            return
 | 
						|
        }
 | 
						|
 | 
						|
        let token = tokenizer.nextToken()
 | 
						|
        #expect(token?.type == .Comment(data: " <script>var x = 1;</script> "))
 | 
						|
 | 
						|
        let token2 = tokenizer.nextToken()
 | 
						|
        #expect(token2?.type == .EndOfFile)
 | 
						|
    }
 | 
						|
}
 |