HTMLStreamer/Tests/HTMLStreamerTests/TokenizerTests.swift

95 lines
4.5 KiB
Swift

//
// TokenizerTests.swift
//
//
// Created by Shadowfacts on 11/22/23.
//
import XCTest
@testable import HTMLStreamer
final class TokenizerTests: XCTestCase {
private func tokenize(_ s: String) -> [Token] {
let iterator = Tokenizer(chars: s.unicodeScalars.makeIterator())
// let iterator = PrintIterator(inner: Tokenizer(chars: s.makeIterator()))
return Array(AnySequence({ iterator }))
}
func testNamedCharacterReferences() {
XCTAssertEqual(tokenize("&"), [.characterSequence("&")])
// missing-semicolon-after-character-reference:
XCTAssertEqual(tokenize("¬in"), [.characterSequence("¬"), .characterSequence("in")])
XCTAssertEqual(tokenize("&notin"), [.characterSequence("¬"), .characterSequence("in")])
// unknown-named-character-reference:
XCTAssertEqual(tokenize("&notit;"), [.characterSequence("¬"), .characterSequence("it;")])
XCTAssertEqual(tokenize("&asdf"), [.characterSequence("&asdf")])
XCTAssertEqual(tokenize("&a"), [.characterSequence("&a")])
// attribute special case
XCTAssertEqual(tokenize("<a a='&nota' />"), [.startTag("a", selfClosing: true, attributes: [Attribute(name: "a", value: "&nota")])])
}
func testNumericCharacterReference() {
XCTAssertEqual(tokenize("&#33;"), [.characterSequence("!")])
XCTAssertEqual(tokenize("&#x21;"), [.characterSequence("!")])
XCTAssertEqual(tokenize("&#x4A;"), [.characterSequence("J")])
XCTAssertEqual(tokenize("&#x4a;"), [.characterSequence("J")])
}
func testStartTag() {
XCTAssertEqual(tokenize("<asdf>"), [.startTag("asdf", selfClosing: false, attributes: [])])
XCTAssertEqual(tokenize("<asdf/>"), [.startTag("asdf", selfClosing: true, attributes: [])])
XCTAssertEqual(tokenize("<asdf />"), [.startTag("asdf", selfClosing: true, attributes: [])])
// double-quoted attributes
XCTAssertEqual(tokenize("<asdf a=\"b\" />"), [.startTag("asdf", selfClosing: true, attributes: [.init(name: "a", value: "b")])])
XCTAssertEqual(tokenize("<asdf a=\"&amp;\" />"), [.startTag("asdf", selfClosing: true, attributes: [.init(name: "a", value: "&")])])
// single-quoted attributes
XCTAssertEqual(tokenize("<asdf a='b' />"), [.startTag("asdf", selfClosing: true, attributes: [.init(name: "a", value: "b")])])
XCTAssertEqual(tokenize("<asdf a='&#x20;' />"), [.startTag("asdf", selfClosing: true, attributes: [.init(name: "a", value: " ")])])
// unquoted attributes
XCTAssertEqual(tokenize("<asdf a=b />"), [.startTag("asdf", selfClosing: true, attributes: [.init(name: "a", value: "b")])])
XCTAssertEqual(tokenize("<asdf a=&#32; />"), [.startTag("asdf", selfClosing: true, attributes: [.init(name: "a", value: " ")])])
}
func testEndTag() {
XCTAssertEqual(tokenize("</asdf>"), [.endTag("asdf")])
XCTAssertEqual(tokenize("</asdf a b='c'>"), [.endTag("asdf")])
}
func testComment() {
XCTAssertEqual(tokenize("<!-- hello -->"), [.comment(" hello ")])
XCTAssertEqual(tokenize("<!- hello -->"), [.comment("- hello --")])
XCTAssertEqual(tokenize("<!-- hello ->"), [.comment(" hello ->")])
}
func testDoctype() {
XCTAssertEqual(tokenize("<!DOCTYPE html>"), [.doctype("html", forceQuirks: false, publicIdentifier: nil, systemIdentifier: nil)])
XCTAssertEqual(tokenize(#"<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN">"#), [.doctype("html", forceQuirks: false, publicIdentifier: "-//W3C//DTD HTML 4.01//EN", systemIdentifier: nil)])
XCTAssertEqual(tokenize(#"<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">"#), [.doctype("html", forceQuirks: false, publicIdentifier: "-//W3C//DTD HTML 4.01//EN", systemIdentifier: "http://www.w3.org/TR/html4/strict.dtd")])
}
func testMultiScalar() {
XCTAssertEqual(tokenize("🇺🇸"), [.characterSequence("\u{1F1FA}\u{1F1F8}")])
}
func testWhitespaceAfterAttributeName() {
XCTAssertEqual(tokenize("<a foo >"), [.startTag("a", selfClosing: false, attributes: [.init(name: "foo", value: "")])])
}
}
private struct PrintIterator<Inner: IteratorProtocol>: IteratorProtocol {
typealias Element = Inner.Element
var inner: Inner
mutating func next() -> Inner.Element? {
let next = inner.next()
print(String(describing: next))
return next
}
}