// // TokenizerTests.swift // // // Created by Shadowfacts on 11/22/23. // import XCTest @testable import HTMLStreamer final class TokenizerTests: XCTestCase { private func tokenize(_ s: String) -> [Token] { let iterator = Tokenizer(chars: s.unicodeScalars.makeIterator()) // let iterator = PrintIterator(inner: Tokenizer(chars: s.makeIterator())) return Array(AnySequence({ iterator })) } func testNamedCharacterReferences() { XCTAssertEqual(tokenize("&"), [.characterSequence("&")]) // missing-semicolon-after-character-reference: XCTAssertEqual(tokenize("¬in"), [.characterSequence("¬"), .characterSequence("in")]) XCTAssertEqual(tokenize("¬in"), [.characterSequence("¬"), .characterSequence("in")]) // unknown-named-character-reference: XCTAssertEqual(tokenize("¬it;"), [.characterSequence("¬"), .characterSequence("it;")]) XCTAssertEqual(tokenize("&asdf"), [.characterSequence("&asdf")]) XCTAssertEqual(tokenize("&a"), [.characterSequence("&a")]) // attribute special case XCTAssertEqual(tokenize(""), [.startTag("a", selfClosing: true, attributes: [Attribute(name: "a", value: "¬a")])]) } func testNumericCharacterReference() { XCTAssertEqual(tokenize("!"), [.characterSequence("!")]) XCTAssertEqual(tokenize("!"), [.characterSequence("!")]) XCTAssertEqual(tokenize("J"), [.characterSequence("J")]) XCTAssertEqual(tokenize("J"), [.characterSequence("J")]) } func testStartTag() { XCTAssertEqual(tokenize(""), [.startTag("asdf", selfClosing: false, attributes: [])]) XCTAssertEqual(tokenize(""), [.startTag("asdf", selfClosing: true, attributes: [])]) XCTAssertEqual(tokenize(""), [.startTag("asdf", selfClosing: true, attributes: [])]) // double-quoted attributes XCTAssertEqual(tokenize(""), [.startTag("asdf", selfClosing: true, attributes: [.init(name: "a", value: "b")])]) XCTAssertEqual(tokenize(""), [.startTag("asdf", selfClosing: true, attributes: [.init(name: "a", value: "&")])]) // single-quoted attributes XCTAssertEqual(tokenize(""), [.startTag("asdf", selfClosing: true, attributes: [.init(name: "a", value: "b")])]) XCTAssertEqual(tokenize(""), [.startTag("asdf", selfClosing: true, attributes: [.init(name: "a", value: " ")])]) // unquoted attributes XCTAssertEqual(tokenize(""), [.startTag("asdf", selfClosing: true, attributes: [.init(name: "a", value: "b")])]) XCTAssertEqual(tokenize(""), [.startTag("asdf", selfClosing: true, attributes: [.init(name: "a", value: " ")])]) } func testEndTag() { XCTAssertEqual(tokenize(""), [.endTag("asdf")]) XCTAssertEqual(tokenize(""), [.endTag("asdf")]) } func testComment() { XCTAssertEqual(tokenize(""), [.comment(" hello ")]) XCTAssertEqual(tokenize(""), [.comment("- hello --")]) XCTAssertEqual(tokenize("