HTMLStreamer/Tests/HTMLStreamerTests/TokenizerTests.swift

85 lines
4.0 KiB
Swift

//
// TokenizerTests.swift
//
//
// Created by Shadowfacts on 11/22/23.
//
import XCTest
@testable import HTMLStreamer
final class TokenizerTests: XCTestCase {
private func tokenize(_ s: String) -> [Token] {
let iterator = Tokenizer(chars: s.makeIterator())
// let iterator = PrintIterator(inner: Tokenizer(chars: s.makeIterator()))
return Array(AnySequence({ iterator }))
}
func testNamedCharacterReferences() {
XCTAssertEqual(tokenize("&"), [.character("&")])
// missing-semicolon-after-character-reference:
XCTAssertEqual(tokenize("¬in"), [.character("¬"), .character("i"), .character("n")])
XCTAssertEqual(tokenize("&notin"), [.character("¬"), .character("i"), .character("n")])
// unknown-named-character-reference:
XCTAssertEqual(tokenize("&notit;"), [.character("¬"), .character("i"), .character("t"), .character(";")])
XCTAssertEqual(tokenize("&asdf"), "&asdf".map { .character($0) })
XCTAssertEqual(tokenize("&a"), "&a".map { .character($0) })
// attribute special case
XCTAssertEqual(tokenize("<a a='&nota' />"), [.startTag("a", selfClosing: true, attributes: [Attribute(name: "a", value: "&nota")])])
}
func testNumericCharacterReference() {
XCTAssertEqual(tokenize("&#33;"), [.character("!")])
XCTAssertEqual(tokenize("&#x21;"), [.character("!")])
}
func testStartTag() {
XCTAssertEqual(tokenize("<asdf>"), [.startTag("asdf", selfClosing: false, attributes: [])])
XCTAssertEqual(tokenize("<asdf/>"), [.startTag("asdf", selfClosing: true, attributes: [])])
XCTAssertEqual(tokenize("<asdf />"), [.startTag("asdf", selfClosing: true, attributes: [])])
// double-quoted attributes
XCTAssertEqual(tokenize("<asdf a=\"b\" />"), [.startTag("asdf", selfClosing: true, attributes: [.init(name: "a", value: "b")])])
XCTAssertEqual(tokenize("<asdf a=\"&amp;\" />"), [.startTag("asdf", selfClosing: true, attributes: [.init(name: "a", value: "&")])])
// single-quoted attributes
XCTAssertEqual(tokenize("<asdf a='b' />"), [.startTag("asdf", selfClosing: true, attributes: [.init(name: "a", value: "b")])])
XCTAssertEqual(tokenize("<asdf a='&#x20;' />"), [.startTag("asdf", selfClosing: true, attributes: [.init(name: "a", value: " ")])])
// unquoted attributes
XCTAssertEqual(tokenize("<asdf a=b />"), [.startTag("asdf", selfClosing: true, attributes: [.init(name: "a", value: "b")])])
XCTAssertEqual(tokenize("<asdf a=&#32; />"), [.startTag("asdf", selfClosing: true, attributes: [.init(name: "a", value: " ")])])
}
func testEndTag() {
XCTAssertEqual(tokenize("</asdf>"), [.endTag("asdf")])
XCTAssertEqual(tokenize("</asdf a b='c'>"), [.endTag("asdf")])
}
func testComment() {
XCTAssertEqual(tokenize("<!-- hello -->"), [.comment(" hello ")])
XCTAssertEqual(tokenize("<!- hello -->"), [.comment("- hello --")])
XCTAssertEqual(tokenize("<!-- hello ->"), [.comment(" hello ->")])
}
func testDoctype() {
XCTAssertEqual(tokenize("<!DOCTYPE html>"), [.doctype("html", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)])
XCTAssertEqual(tokenize(#"<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN">"#), [.doctype("html", forceQuirks: false, publicIdentifier: "-//W3C//DTD HTML 4.01//EN", systemIdentifier: nil)])
XCTAssertEqual(tokenize(#"<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">"#), [.doctype("html", forceQuirks: false, publicIdentifier: "-//W3C//DTD HTML 4.01//EN", systemIdentifier: "http://www.w3.org/TR/html4/strict.dtd")])
}
}
private struct PrintIterator<Inner: IteratorProtocol>: IteratorProtocol {
typealias Element = Inner.Element
var inner: Inner
mutating func next() -> Inner.Element? {
let next = inner.next()
print(String(describing: next))
return next
}
}